其他分享
首页 > 其他分享> > prometheus监控方案(草稿)

prometheus监控方案(草稿)

作者:互联网

一、描述

两台服务器:192.168.11.109、192.168.11.121

第一台计划安装:prometheus、blackbox_exporter、snmp_exporter、consul、grafana、consul-template、thanos query、thanos sidecar、thanos rule

第二台计划安装:prometheus、blackbox_exporter、snmp_exporter、thanos sidecar

二、安装blackbox探针,2台都安装

wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.19.0/blackbox_exporter-0.19.0.linux-amd64.tar.gz

tar -zxvf blackbox_exporter-0.19.0.linux-amd64.tar.gz

mv blackbox_exporter-0.19.0.linux-amd64 /usr/local/blackbox_exporter

cd /usr/local/blackbox_exporter

vim blackbox.yml

(实际我们只用到network_ping)

modules:
  http_2xx:
    prober: http
  http_post_2xx:
    prober: http
    http:
      method: POST
  tcp_connect:
    prober: tcp
  pop3s_banner:
    prober: tcp
    tcp:
      query_response:
      - expect: "^+OK"
      tls: true
      tls_config:
        insecure_skip_verify: false
  ssh_banner:
    prober: tcp
    tcp:
      query_response:
      - expect: "^SSH-2.0-"
  irc_banner:
    prober: tcp
    tcp:
      query_response:
      - send: "NICK prober"
      - send: "USER prober prober prober :prober"
      - expect: "PING :([^ ]+)"
        send: "PONG ${1}"
      - expect: "^:[^ ]+ 001"
  network_ping:
    prober: icmp
    timeout: 5s
    icmp:
      preferred_ip_protocol: "ipv4"
      payload_size: 100
      dont_fragment: true

添加到systemctl服务

vim /etc/systemd/system/blackbox.service

[Unit]
Description=Blackbox
After=network.target

[Service]
User=root
Group=root
Type=simple
Restart=on-failure
ExecStart=/usr/local/blackbox_exporter/blackbox_exporter --config.file=/usr/local/blackbox_exporter/blackbox.yml

[Install]
WantedBy=multi-user.target

服务自启动

systemctl daemon-reload

systemctl enable blackbox

systemctl start blackbox

三、安装snmp探针,2台都安装

wget https://github.com/prometheus/snmp_exporter/releases/download/v0.20.0/snmp_exporter-0.20.0.linux-amd64.tar.gz

tar -zxvf snmp_exporter-0.20.0.linux-amd64.tar.gz

mv snmp_exporter-0.20.0.linux-amd64 /usr/local/snmp_exporter

cd /usr/local/snmp_exporter

vim snmp.yml

(三个module,分别是接口流量、cpu内存、sla侦测)

network_if_table:
  walk:
  - 1.3.6.1.2.1.31.1.1.1.1
  - 1.3.6.1.2.1.31.1.1.1.10
  - 1.3.6.1.2.1.31.1.1.1.6
  - 1.3.6.1.2.1.31.1.1.1.2
  - 1.3.6.1.2.1.31.1.1.1.3
  - 1.3.6.1.2.1.31.1.1.1.4
  - 1.3.6.1.2.1.31.1.1.1.5
  - 1.3.6.1.2.1.31.1.1.1.7
  - 1.3.6.1.2.1.31.1.1.1.11
  - 1.3.6.1.2.1.31.1.1.1.15
  - 1.3.6.1.2.1.31.1.1.1.18
  - 1.3.6.1.2.1.2.2.1.14
  - 1.3.6.1.2.1.2.2.1.13
  - 1.3.6.1.2.1.2.2.1.20
  - 1.3.6.1.2.1.2.2.1.19
  - 1.3.6.1.2.1.2.2.1.7
  - 1.3.6.1.2.1.2.2.1.8

  metrics:
  - name: ifName
    oid: 1.3.6.1.2.1.31.1.1.1.1
    type: DisplayString
    help: The textual name of the interface - 1.3.6.1.2.1.31.1.1.1.1
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
  - name: ifHCOutOctets
    oid: 1.3.6.1.2.1.31.1.1.1.10
    type: counter
    help: The total number of octets transmitted out of the interface, includingframing
      characters - 1.3.6.1.2.1.31.1.1.1.10
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifHCInOctets
    oid: 1.3.6.1.2.1.31.1.1.1.6
    type: counter
    help: The total number of octets received on the interface, including framing
      characters - 1.3.6.1.2.1.31.1.1.1.6
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifInMulticastPkts
    oid: 1.3.6.1.2.1.31.1.1.1.2
    type: counter
    help: The total number of multicast packets received on the interface, including framing
      characters - 1.3.6.1.2.1.31.1.1.1.2
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifInBroadcastPkts
    oid: 1.3.6.1.2.1.31.1.1.1.3
    type: counter
    help: The total number of broadcast packets received on the interface, including framing
      characters - 1.3.6.1.2.1.31.1.1.1.3
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifOutMulticastPkts
    oid: 1.3.6.1.2.1.31.1.1.1.4
    type: counter
    help: The total number of multicast packets transmitted out of the interface, including framing
      characters - 1.3.6.1.2.1.31.1.1.1.4
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifOutBroadcastPkts
    oid: 1.3.6.1.2.1.31.1.1.1.5
    type: counter
    help: The total number of broadcast packets transmitted out of the interface, including framing
      characters - 1.3.6.1.2.1.31.1.1.1.5
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifHCInUcastPkts
    oid: 1.3.6.1.2.1.31.1.1.1.7
    type: counter
    help: The total number of unicast packets received on the interface, including framing
      characters - 1.3.6.1.2.1.31.1.1.1.7
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifHCOutUcastPkts
    oid: 1.3.6.1.2.1.31.1.1.1.11
    type: counter
    help: The total number of Unicast packets transmitted out of the interface, including framing
      characters - 1.3.6.1.2.1.31.1.1.1.11
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifHighSpeed
    oid: 1.3.6.1.2.1.31.1.1.1.15
    type: gauge
    help: An estimate of the interface's current bandwidth in units of 1,000,000 bits
      per second - 1.3.6.1.2.1.31.1.1.1.15
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifInDiscards
    oid: 1.3.6.1.2.1.2.2.1.13
    type: counter
    help: The number of inbound packets which were chosen to be discarded even though
      no errors had been detected to prevent their being deliverable to a higher- layer
      protocol - 1.3.6.1.2.1.2.2.1.13
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifInErrors
    oid: 1.3.6.1.2.1.2.2.1.14
    type: counter
    help: For packet- oriented interfaces, the number of inbound packets that contained
      errors preventing them from being deliverable to a higher - layer protocol - 1.3.6.1.2.1.2.2.1.14
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifOutDiscards
    oid: 1.3.6.1.2.1.2.2.1.19
    type: counter
    help: The number of outbound packets which were chosen to be discarded even though
      no errors had been detected to prevent their being transmitted - 1.3.6.1.2.1.2.2.1.19
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifOutErrors
    oid: 1.3.6.1.2.1.2.2.1.20
    type: counter
    help: For packet- oriented interfaces, the number of outbound packets that could
      not be transmitted because of errors - 1.3.6.1.2.1.2.2.1.20
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifOperStatus
    oid: 1.3.6.1.2.1.2.2.1.8
    type: gauge
    help: The current operational state of the interface - 1.3.6.1.2.1.2.2.1.8
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
  - name: ifAdminStatus
    oid: 1.3.6.1.2.1.2.2.1.7
    type: gauge
    help: The desired state of the interface - 1.3.6.1.2.1.2.2.1.7
    indexes:
    - labelname: ifIndex
      type: gauge
    lookups:
    - labels:
      - ifIndex
      labelname: ifAlias
      oid: 1.3.6.1.2.1.31.1.1.1.18
      type: DisplayString
    - labels:
      - ifIndex
      labelname: ifName
      oid: 1.3.6.1.2.1.31.1.1.1.1
      type: DisplayString
 
  version: 2
  auth:
    community: linux
    security_level: noAuthNoPriv
    auth_protocol: MD5
    priv_protocol: DES


#cisco ip sla metrics:
#ipsla_status: 1 success, 4 fail
#ipSlaTag: description in network device's ipsla configuration

network_cisco_ipsla:
  walk:
  - 1.3.6.1.4.1.9.9.42.1.2.10.1.1
  - 1.3.6.1.4.1.9.9.42.1.2.10.1.2
  - 1.3.6.1.4.1.9.9.42.1.2.1.1.3
  metrics:
  - name: ipSlaTag
    oid: 1.3.6.1.4.1.9.9.42.1.2.1.1.3
    type: DisplayString
    help: The tag of ip sla, - 1.3.6.1.4.1.9.9.42.1.2.1.1.3
    indexes:
    - labelname: ipSlaIndex
      type: gauge
  - name: ipsla_status
    oid: 1.3.6.1.4.1.9.9.42.1.2.10.1.2
    type: gauge
    indexes:
    - labelname: ipSlaIndex
      type: gauge
    lookups:
    - labels:
      - ipSlaIndex
      labelname: ipSlaTag
      oid: 1.3.6.1.4.1.9.9.42.1.2.1.1.3
      type: DisplayString
  - name: ipsla_rtt
    oid: 1.3.6.1.4.1.9.9.42.1.2.10.1.1
    type: gauge
    help: The rtt of ip sla, - 1.3.6.1.4.1.9.9.42.1.2.10.1.1
    indexes:
    - labelname: ipSlaIndex
      type: gauge
    lookups:
    - labels:
      - ipSlaIndex
      labelname: ipSlaTag
      oid: 1.3.6.1.4.1.9.9.42.1.2.1.1.3
      type: DisplayString
  version: 2
  auth:
    community: linux
    security_level: noAuthNoPriv
    auth_protocol: MD5
    priv_protocol: DES


network_cisco_process:
  walk:
  - 1.3.6.1.4.1.9.9.109.1.1.1.1.4
  - 1.3.6.1.4.1.9.9.48.1.1.1.6.1
  - 1.3.6.1.4.1.9.9.48.1.1.1.6.2
  - 1.3.6.1.4.1.9.9.48.1.1.1.5.1
  - 1.3.6.1.4.1.9.9.48.1.1.1.5.2 
  metrics:
  - name: CpuUsage1min
    oid: 1.3.6.1.4.1.9.9.109.1.1.1.1.4
    type: gauge
  - name: ProcessorFreeMem
    oid: 1.3.6.1.4.1.9.9.48.1.1.1.6.1
    type: counter
  - name: IOFreeMem
    oid: 1.3.6.1.4.1.9.9.48.1.1.1.6.2
    type: gauge    
  - name: ProcessorUsedMem
    oid: 1.3.6.1.4.1.9.9.48.1.1.1.5.1
    type: counter
  - name: IOUsedMem
    oid: 1.3.6.1.4.1.9.9.48.1.1.1.5.2
    type: gauge    
  version: 2
  auth:
    community: linux
    security_level: noAuthNoPriv
    auth_protocol: MD5
    priv_protocol: DES

添加systemctl服务

vim /etc/systemd/system/snmp_exporter.service

[Unit]
Description=snmp_exporter
After=network.target

[Service]
User=root
Group=root
Type=simple
Restart=on-failure
ExecStart=/usr/local/snmp_exporter/snmp_exporter --config.file=/usr/local/snmp_exporter/snmp.yml

[Install]
WantedBy=multi-user.target

服务自启动

systemctl daemon-reload

systemctl enable snmpexporter

systemctl start snmpexporter

四、安装consul,仅第一台安装

wget https://releases.hashicorp.com/consul/1.10.1/consul_1.10.1_linux_amd64.zip

unzip consul_1.10.1_linux_amd64.zip

mkdir /usr/local/consul

mv consul /usr/local/consul/

cd /usr/local/consul

新建配置文件路径和数据路径

mkdir conf

mkdir data

vim consul.pid

2008

vim conf/consul-srv.json

{
  "datacenter": "mydc",
  "data_dir": "/usr/local/consul/data",
  "log_level": "info",
  "node_name": "mydc1",
  "server": true,
  "bind_addr": "0.0.0.0",
  "client_addr": "0.0.0.0",
  "advertise_addr": "192.168.11.109",
  "retry_interval": "30s",
  "enable_debug": false,
  "rejoin_after_leave": false,
  "enable_syslog": true,
  "syslog_facility": "local0",
  "ui": true
}

vim conf/port.json

{
  "ports":{
    "http": 8500,
    "dns": 8600,
    "serf_lan": 8301,
    "serf_wan": 8302,
    "server": 8300
  }
}

添加consul服务

vim /etc/systemd/system/consul.service

[Unit]
Description=Consul
Requires=network-online.target
After=network-online.target

[Service]
User=root
Group=root
Environment=GOMAXPROCS=2
Restart=on-failure
ExecStartPre=[ -f "/usr/local/consul/consul.pid" ] && /usr/bin/rm -f /usr/local/consul/consul.pid
ExecStartPre=/usr/local/consul/consul validate /usr/local/consul/conf/
ExecStart=/usr/local/consul/consul agent -config-dir=/usr/local/consul/conf/ -pid-file=/usr/local/consul/consul.pid
ExecReload=/bin/kill -HUP
KillSignal=sIGTERM
TimeoutStopSec=5
LimitNOFILE=65535

[Install]
WantedBy=multi-user.target

服务自启动

systemctl daemon-reload

systemctl enable consul

systemctl start consul

http://192.168.11.109:8500/ui/mydc/kv

添加规则rules/router_sla和rules/switch_sla


- name: router_sla_fail 
  rules: 
  - alert: router_sla_fail 
    expr: sum_over_time(ipsla_status{device="router", host=~".+?", job="network_cisco_ipsla"}[1m]) >= 48 
    for: 1m 
    labels: 
      severity: minor
      appType: net 
    annotations: 
      realvalue: "{{ $value }}"
      ruleId: 2 
      summary: "{{$labels.idc}}:{{$labels.device}}:{{$labels.host}}:{{$labels.ipSlaTag}}:fail" 
      

- name: switch_sla_fail
  rules: 
  - alert: switch_sla_fail 
    expr: sum_over_time(ipsla_status{device="switch", host=~".+?", job="network_cisco_ipsla"}[1m]) >= 48 
    for: 1m 
    labels: 
      severity: major 
      appType: net 
    annotations: 
      realvalue: "{{ $value }}"
      ruleId: 1 
      summary: "{{$labels.idc}}:{{$labels.device}}:{{$labels.host}}:{{$labels.ipSlaTag}}:fail" 
      

也可以通过curl推送规则到consul

curl -X PUT -d '''

- name: router_sla_fail 
  rules: 
  - alert: router_sla_fail 
    expr: sum_over_time(ipsla_status{device="router", host=~".+?", job="network_cisco_ipsla"}[1m]) >= 48 
    for: 1m 
    labels: 
      severity: minor
      appType: net 
    annotations: 
      realvalue: "{{ $value }}"
      ruleId: 2 
      summary: "{{$labels.idc}}:{{$labels.device}}:{{$labels.host}}:{{$labels.ipSlaTag}}:fail" 

''' http://localhost:8500/v1/kv/network/router_sla

五、安装consul-template,仅第一台安装

wget https://releases.hashicorp.com/consul-template/0.26.0/consul-template_0.26.0_linux_amd64.zip

unzip consul-template_0.26.0_linux_amd64.zip

mv consul-template /usr/local/consul_template

cd /usr/local/consul_template

mkdir conf

vim conf/config_rule.hcl

consul {
  address = "192.168.11.109:8500"

  retry {
    enabled = true
    attempts = 5
    backoff = "250ms"
  }
}

log_level = "info"

wait {
  min = "5s"
  max = "10s"
}

template = {
  source = "/usr/local/thanos_rule/conf/rule.tpl"
  destination = "/usr/local/thanos_rule/conf/rule.rules"
  command = "systemctl restart thanos_rule"
  command_timeout = "60s"

  wait {
    min = "2s"
    max = "10s"
  }
}

添加consul_template服务

vim /etc/systemd/system/consul_template_rule.service

[Unit]
Description = consul_template_rule
Wants = network-online.target
After = network-online.target

[Service]
User = root
Group = root
Restart = on-failure
ExecStart = /usr/local/consul_template/consul-template -config "/usr/local/consul_template/conf/config_rule.hcl"
LimitNOFILE = 65535

[Install]
WantedBy = multi-user.target

服务自启动

systemctl daemon-reload

systemctl enable consul_template_rule

systemctl start consul_template_rule

六、安装prometheus

wget https://github.com/prometheus/prometheus/releases/download/v2.29.0-rc.0/prometheus-2.29.0-rc.0.linux-amd64.tar.gz

tar -zxvf prometheus-2.29.0-rc.0.linux-amd64.tar.gz

mv prometheus-2.29.0-rc.0.linux-amd64 /usr/local/prometheus

cd /usr/local/prometheus

vim prometheus.yml

第一台:

global:
  scrape_interval: 60s
  scrape_timeout: 60s
  evaluation_interval: 1m
  external_labels:
    slave: '192.168.11.109:9090'
    idc: 'mydc'

scrape_configs:

  - job_name: 'snmp_5s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 5s
    scrape_timeout: 5s
    metrics_path: /snmp
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^1$
      action: keep   
    - source_labels: [__address__]  
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.109:9116
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(snmp_5s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'snmp_10s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 10s
    scrape_timeout: 10s
    metrics_path: /snmp
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^1$
      action: keep   
    - source_labels: [__address__]  
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.109:9116
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(snmp_10s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'snmp_60s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 60s
    scrape_timeout: 60s
    metrics_path: /snmp
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^1$
      action: keep   
    - source_labels: [__address__]  
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.109:9116
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(snmp_60s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'black_5s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 5s
    scrape_timeout: 5s
    metrics_path: /probe
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^1$
      action: keep   
    - source_labels: [__address__]
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.109:9115
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(black_5s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'black_10s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 10s
    scrape_timeout: 10s
    metrics_path: /probe
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^1$
      action: keep   
    - source_labels: [__address__]
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.109:9115
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(black_10s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

第二台:

global:
  scrape_interval: 60s
  scrape_timeout: 60s
  evaluation_interval: 1m
  external_labels:
    slave: '192.168.11.121:9090'
    idc: 'mydc'

scrape_configs:

  - job_name: 'snmp_5s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 5s
    scrape_timeout: 5s
    metrics_path: /snmp
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^2$
      action: keep
    - source_labels: [__address__]
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.121:9116
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(snmp_5s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'snmp_10s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 10s
    scrape_timeout: 10s
    metrics_path: /snmp
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^2$
      action: keep
    - source_labels: [__address__]
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.121:9116
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(snmp_10s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'snmp_60s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 60s
    scrape_timeout: 60s
    metrics_path: /snmp
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^2$
      action: keep
    - source_labels: [__address__]
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.121:9116
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(snmp_60s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'black_5s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 5s
    scrape_timeout: 5s
    metrics_path: /probe
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^2$
      action: keep
    - source_labels: [__address__]
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.121:9115
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(black_5s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

  - job_name: 'black_10s'
    consul_sd_configs:
      - server: 192.168.11.109:8500
        datacenter: mydc
    scrape_interval: 10s
    scrape_timeout: 10s
    metrics_path: /probe
    relabel_configs:
    - source_labels: [__address__]
      modulus: 2
      target_label: __tmp_hash
      action: hashmod
    - source_labels: [__tmp_hash]
      regex: ^2$
      action: keep
    - source_labels: [__address__]
      regex: '(\d+\.\d+\.\d+\.\d+):.*'
      replacement: $1
      target_label: __param_target
    - source_labels: [__param_target]
      target_label: instance
    - target_label: __address__
      replacement: 192.168.11.121:9115
    - source_labels: [__meta_consul_address]
      target_label: ip
    - source_labels: [__meta_consul_node]
      target_label: host
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){3}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [job_name]
      regex: '(black_10s)'
      action: keep
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){0}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){1}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_tags]
      regex: ',(?:[^,]+,){2}([^=]+)=([^,]+),.*'
      replacement: '${2}'
      target_label: '${1}'
    - source_labels: [__meta_consul_service]
      target_label: job
    - source_labels: [__meta_consul_service]
      target_label: __param_module

添加systemctl服务,存储数据7天

vim /etc/systemd/system/prometheus.service

[Unit]
Description=Prometheus
Wants=network-online.target
After=network-online.target

[Service]
User=root
Group=root
Restart=on-failure
ExecStartPre=/usr/local/prometheus/promtool check config /usr/local/prometheus/prometheus.yml
ExecStart=/usr/local/prometheus/prometheus --config.file  /usr/local/prometheus/prometheus.yml --storage.tsdb.path /usr/local/prometheus/data --web.enable-lifecycle --storage.tsdb.retention=7d
ExecReload=/usr/bin/curl -X POST localhost:9090/-/reload

服务自启动

systemctl daemon-reload

systemctl enable prometheus

systemctl start prometheus

七、安装thanos

wget https://github.com/thanos-io/thanos/releases/download/v0.22.0-rc.0/thanos-0.22.0-rc.0.linux-amd64.tar.gz

tar -zxvf thanos-0.22.0-rc.0.linux-amd64.tar.gz

mv thanos-0.22.0-rc.0.linux-amd64 /usr/local/thanos

添加thanos sidecar服务(两台)

vim /etc/systemd/system/thanos_sidecar.service

第一台:

[Unit]
Description=thanos_sidecar

[Service]
Type=simple
User=root
ExecStart=/usr/local/thanos/thanos sidecar --log.level=info --grpc-address=192.168.11.109:10901 --http-address=192.168.11.109:10902 --prometheus.url=http://192.168.11.109:9090 --tsdb.path=/usr/local/prometheus/data

[Install]
WantedBy=multi-user.target

第二台:

[Unit]
Description=thanos_sidecar

[Service]
Type=simple
User=root
ExecStart=/usr/local/thanos/thanos sidecar --log.level=info --grpc-address=192.168.11.121:10901 --http-address=192.168.11.121:10902 --prometheus.url=http://192.168.11.121:9090 --tsdb.path=/usr/local/prometheus/data

[Install]
WantedBy=multi-user.target

启动服务

systemctl daemon-reload

systemctl enable thanos_sidecar

systemctl start thanos_sidecar

添加thanos query服务(第一台)

vim /etc/systemd/system/thanos_query.service

[Unit]
Description=thanos_query

[Service]
Type=simple
User=root
ExecStart=/usr/local/thanos/thanos query --query.auto-downsampling --store=192.168.11.109:10901 --store=192.168.11.121:10901 --grpc-address=192.168.11.109:20901 --http-address=192.168.11.109:80

[Install]
WantedBy=multi-user.target

启动服务

systemctl daemon-reload

systemctl enable thanos_query

systemctl start thanos_query

添加thanos rule服务(第一台)

cd /usr/local/

mkdir thanos_rule

mkdir -p {conf,data}

vim conf/rule.tpl

groups:
{{ range ls "rules"}}
{{ .Value}}
{{ end }}

vim /etc/systemd/system/thanos_rule.service

[Unit]
Description = consul_template
Wants = network-online.target
After = network-online.target

[Service]
User = root
Group = root
Restart = on-failure
ExecStart = /usr/local/thanos/thanos rule --data-dir "/usr/local/thanos_rule/data" --rule-file "/usr/local/thanos_rule/conf/*.rules" --alert.query-url "http://192.168.11.109" --query "192.168.11.109:80" --alertmanagers.url "http://192.168.11.109:9093" --log.level "info" --grpc-address "192.168.11.109:10911" --http-address "192.168.11.109:10912" --eval-interval "60s"
LimitNOFILE = 65535

[Install]
WantedBy = multi-user.target

启动服务

systemctl daemon-reload

systemctl enable thanos_rule

systemctl start thanos_rule

八、思科路由器配置sla和snmp,192.168.11.134是另一台pc

sla:

ip sla 1
icmp-echo 192.168.11.134 source-ip 192.168.11.106
tag sla1
frequency 5
ip sla schedule 1 life forever start-time now

snmp:

snmp-server community linux RO

九、添加设备和需要启用的监控模板到consul

curl --request PUT --data '{"node": "RT4", "address": "192.168.11.106","service": { "service": "network_if_table", "tags": ["idc=mydc","device=router","owner=zc","job_name=snmp_60s"]}}' http://192.168.11.109:8500/v1/catalog/register

curl --request PUT --data '{"node": "RT4", "address": "192.168.11.106","service": { "service": "network_cisco_ipsla", "tags": ["idc=mydc","device=router","owner=zc","job_name=snmp_5s"]}}' http://192.168.11.109:8500/v1/catalog/register

curl --request PUT --data '{"node": "RT4", "address": "192.168.11.106","service": { "service": "network_cisco_process", "tags": ["idc=mydc","device=router","owner=zc","job_name=snmp_5s"]}}' http://192.168.11.109:8500/v1/catalog/register

curl --request PUT --data '{"node": "RT4", "address": "192.168.11.106","service": { "service": "network_ping", "tags": ["idc=mydc","device=router","owner=zc","job_name=black_5s"]}}' http://192.168.11.109:8500/v1/catalog/register

如果要删除设备node

curl --request PUT --data '{ "node": "RT4", "address": "192.168.11.106"}' http://192.168.11.109:8500/v1/catalog/deregister

十、安装grafana

wget https://dl.grafana.com/enterprise/release/grafana-enterprise-6.2.5-1.x86_64.rpm
yum install grafana-enterprise-6.2.5-1.x86_64.rpm

systemctl enable grafana-server

systemctl start grafana-server

http://192.168.11.109:3000/

添加数据源

prometheus方案1.png

添加Dashboard,在json model中导入下面代码

{
  "annotations": {
    "list": [
      {
        "builtIn": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      },
      {
        "builtln": 1,
        "datasource": "-- Grafana --",
        "enable": true,
        "hide": true,
        "iconColor": "rgba(0, 211, 255, 1)",
        "name": "Annotations & Alerts",
        "type": "dashboard"
      }
    ]
  },
  "editable": true,
  "gnetId": null,
  "graphTooltip": 0,
  "id": 3,
  "iteration": 1659508685852,
  "links": [],
  "panels": [
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "mydc",
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 24,
        "x": 0,
        "y": 0
      },
      "hiddenSeries": false,
      "id": 1,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.2.1",
      "pointradius": 1,
      "points": true,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "irate(ifHCInOctets{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])*8",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-inBound",
          "refId": "A",
          "refld": "A"
        },
        {
          "expr": "irate(ifHCOutOctets{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])*8",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-outBound",
          "refId": "B",
          "refld": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Port Traffic",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "bps",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "mydc",
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 24,
        "x": 0,
        "y": 7
      },
      "hiddenSeries": false,
      "id": 6,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.2.1",
      "pointradius": 1,
      "points": true,
      "renderer": "flot",
      "repeat": "SWITCHPORT",
      "repeatDirection": "h",
      "scopedVars": {
        "SWITCHPORT": {
          "selected": true,
          "text": "Et0/0",
          "value": "Et0/0"
        }
      },
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "irate(ifHCInUcastPkts{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "instant": false,
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-UcastIN",
          "refId": "A",
          "refld": "A"
        },
        {
          "expr": "irate(ifHCOutUcastPkts{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-UcastOUT",
          "refId": "B",
          "refld": "B"
        },
        {
          "expr": "irate(ifInMulticastPkts{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-MulticastIN",
          "refId": "C",
          "refld": "C"
        },
        {
          "expr": "irate(ifOutMulticastPkts{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-MulticastOUT",
          "refId": "D",
          "refld": "D"
        },
        {
          "expr": "irate(ifInBroadcastPkts{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-BroadcastIN",
          "refId": "E",
          "refld": "E"
        },
        {
          "expr": "irate(ifOutBroadcastPkts{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[15m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-BroadcastOUT",
          "refId": "F",
          "refld": "F"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Port Packets",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "pps",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "mydc",
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 7,
        "w": 24,
        "x": 0,
        "y": 15
      },
      "hiddenSeries": false,
      "id": 2,
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.2.1",
      "pointradius": 1,
      "points": true,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "irate(ifInDiscards{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-DiscardsIN",
          "refId": "A",
          "refld": "A"
        },
        {
          "expr": "irate(ifOutDiscards{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "instant": false,
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-DiscardOUT",
          "refId": "B",
          "refld": "B"
        },
        {
          "expr": "irate(ifInErrors{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-ErrorsIN",
          "refId": "C",
          "refld": "C"
        },
        {
          "expr": "irate(ifOutErrors{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}[5m])",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 2,
          "legendFormat": "{{host}}-{{ifName}}-ErrorsOUT",
          "refId": "D",
          "refld": "D"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Port Errors",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "pps",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        },
        {
          "format": "short",
          "label": null,
          "logBase": 1,
          "max": null,
          "min": null,
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "mydc",
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 9,
        "w": 24,
        "x": 0,
        "y": 22
      },
      "hiddenSeries": false,
      "id": 8,
      "interval": "",
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.2.1",
      "pointradius": 1,
      "points": true,
      "renderer": "flot",
      "seriesOverrides": [
        {
          "alias": "ifOperStatus",
          "yaxis": 2
        },
        {
          "alias": "ifOperStatus: 1 up 2 down 3 testing",
          "yaxis": 2
        },
        {
          "alias": "ifStatus:1 up / 0,2 down",
          "yaxis": 2
        }
      ],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "ifHighSpeed{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}",
          "formal": "time_series",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 1,
          "legendFormat": "ifAlias: {{ifAlias}}",
          "refId": "A",
          "refld": "A"
        },
        {
          "expr": "ifOperStatus{host=\"$HOST\",ifName=\"$SWITCHPORT\",job=\"network_if_table\"}",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 1,
          "legendFormat": "ifStatus:1 up / 0,2 down",
          "refId": "B",
          "refld": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "Port Speed / Port Status",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "decimals": 3,
          "format": "Mbits",
          "label": "Speed",
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        },
        {
          "decimals": 0,
          "format": "short",
          "label": "ifStatus",
          "logBase": 1,
          "max": "5",
          "min": "0",
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "mydc",
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 24,
        "x": 0,
        "y": 31
      },
      "hiddenSeries": false,
      "id": 10,
      "interval": "5s",
      "legend": {
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "show": true,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.2.1",
      "pointradius": 1,
      "points": true,
      "renderer": "flot",
      "seriesOverrides": [
        {
          "alias": "ipSlaStatus: 1 up / 0,4 down",
          "yaxis": 2
        }
      ],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "ipsla_rtt{host=\"$HOST\",ipSlaTag=\"$SLA\"}",
          "format": "time_series",
          "interval": "5s",
          "intervalFactor": 1,
          "legendFormat": "ipSlaRtt: {{ipSlaTag}}",
          "refId": "A",
          "refld": "A"
        },
        {
          "expr": "ipsla_status{host=\"$HOST\",ipSlaTag =\"$SLA\"}",
          "format": "time_series",
          "interval": "5s",
          "intervalFactor": 1,
          "legendFormat": "ipSlaStatus: 1 up / 0,4 down",
          "refId": "B",
          "refld": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "ipSlaRtt / ipSlaStatus",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "format": "ms",
          "label": "ipSlaRtt",
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        },
        {
          "decimals": 0,
          "format": "short",
          "label": "ipSlaStatus",
          "logBase": 1,
          "max": "5",
          "min": "0",
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "mydc",
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 0,
        "y": 39
      },
      "hiddenSeries": false,
      "id": 4,
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": true,
        "max": false,
        "min": false,
        "rightside": false,
        "show": true,
        "sort": "current",
        "sortDesc": false,
        "total": false,
        "values": true
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.2.1",
      "pointradius": 1,
      "points": true,
      "renderer": "flot",
      "seriesOverrides": [],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "CpuUsage1min{PhysicalName=~\"Board.*|RPU *\",host=~\"$HOST\",job=\"network_router_h3c\"} or CpuUsage1min{host=~\"$HOST\",job=\"network_cisco_process\"}",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 1,
          "legendFormat": "{{host}}: CpuUsage",
          "refId": "A",
          "refld": "A"
        },
        {
          "expr": "ProcessorUsedMem{host=~\"$HOST\",job=\"network_cisco_process\"} / (ProcessorUsedMem{host=~\"$HOST\",job=\"network_cisco_process\"} + ProcessorFreeMem{host=~\"$HOST\",job=\"network_cisco_process\"}) * 100",
          "format": "time_series",
          "interval": "60s",
          "intervalFactor": 1,
          "legendFormat": "{{host}}: ProcessorMemUsage",
          "refId": "O"
        },
        {
          "expr": "IOUsedMem{host=~\"$HOST\",job=\"network_cisco_process\"} / (IOUsedMem{host=~\"$HOST\",job=\"network_cisco_process\"} + IOFreeMem{host=~\"$HOST\",job=\"network_cisco_process\"}) * 100",
          "format": "time_series",
          "intervalFactor": 1,
          "legendFormat": "{{host}}: IOMemUsage",
          "refId": "P"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "CPU/MEM/Session",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "decimals": 0,
          "format": "percent",
          "label": "CPU / MEM",
          "logBase": 1,
          "max": "100",
          "min": "0",
          "show": true
        },
        {
          "format": "none",
          "label": "SESSION",
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    },
    {
      "aliasColors": {},
      "bars": false,
      "dashLength": 10,
      "dashes": false,
      "datasource": "mydc",
      "fieldConfig": {
        "defaults": {
          "custom": {}
        },
        "overrides": []
      },
      "fill": 0,
      "fillGradient": 0,
      "gridPos": {
        "h": 8,
        "w": 12,
        "x": 12,
        "y": 39
      },
      "hiddenSeries": false,
      "id": 11,
      "interval": "5s",
      "legend": {
        "alignAsTable": false,
        "avg": false,
        "current": false,
        "max": false,
        "min": false,
        "rightSide": false,
        "show": true,
        "sort": "current",
        "sortDesc": false,
        "total": false,
        "values": false
      },
      "lines": true,
      "linewidth": 1,
      "links": [],
      "nullPointMode": "null",
      "options": {
        "alertThreshold": true
      },
      "percentage": false,
      "pluginVersion": "7.2.1",
      "pointradius": 1,
      "points": true,
      "renderer": "flot",
      "seriesOverrides": [
        {
          "alias": "pingStatus: 1 up / 0 down",
          "yaxis": 2
        }
      ],
      "spaceLength": 10,
      "stack": false,
      "steppedLine": false,
      "targets": [
        {
          "expr": "probe_success{host=\"$HOST\",job=\"network_ping\"}",
          "format": "time_series",
          "interval": "5s",
          "intervalFactor": 1,
          "legendFormat": "pingStatus: 1 up / 0 down",
          "refId": "A",
          "refld": "A"
        },
        {
          "expr": "probe_icmp_duration_seconds{host=\"$HOST\",job=\"network_ping\",phase=\"rtt\"}",
          "format": "time_series",
          "interval": "5s",
          "intervalFactor": 1,
          "legendFormat": "pingRtt",
          "refId": "B",
          "refld": "B"
        }
      ],
      "thresholds": [],
      "timeFrom": null,
      "timeRegions": [],
      "timeShift": null,
      "title": "pingRtt / pingStatus",
      "tooltip": {
        "shared": true,
        "sort": 0,
        "value_type": "individual"
      },
      "type": "graph",
      "xaxis": {
        "buckets": null,
        "mode": "time",
        "name": null,
        "show": true,
        "values": []
      },
      "yaxes": [
        {
          "decimals": null,
          "format": "s",
          "label": "pingRtt",
          "logBase": 1,
          "max": null,
          "min": "0",
          "show": true
        },
        {
          "decimals": 0,
          "format": "short",
          "label": "pingStatus",
          "logBase": 1,
          "max": "5",
          "min": "0",
          "show": true
        }
      ],
      "yaxis": {
        "align": false,
        "alignLevel": null
      }
    }
  ],
  "refresh": false,
  "schemaVersion": 18,
  "style": "dark",
  "tags": [],
  "templating": {
    "list": [
      {
        "allValue": null,
        "current": {
          "selected": true,
          "text": "RT4",
          "value": "RT4"
        },
        "datasource": "mydc",
        "definition": "up{owner='zc'}",
        "hide": 0,
        "includeAll": false,
        "label": null,
        "multi": true,
        "name": "HOST",
        "options": [],
        "query": "up{owner='zc'}",
        "refresh": 1,
        "regex": "/host=\"(.*?)\"/",
        "skipUrlSync": false,
        "sort": 0,
        "tagValuesQuery": "",
        "tags": [],
        "tagsQuery": "",
        "type": "query",
        "useTags": false
      },
      {
        "allValue": null,
        "current": {
          "selected": true,
          "text": "Et0/0",
          "value": "Et0/0"
        },
        "datasource": "mydc",
        "definition": "ifHighSpeed{host=~\"$HOST\",owner=\"zc\"}",
        "hide": 0,
        "includeAll": false,
        "label": null,
        "multi": true,
        "name": "SWITCHPORT",
        "options": [],
        "query": "ifHighSpeed{host=~\"$HOST\",owner=\"zc\"}",
        "refresh": 1,
        "regex": "/ifName=\"(.*?)\"/",
        "skipUrlSync": false,
        "sort": 0,
        "tagValuesQuery": "",
        "tags": [],
        "tagsQuery": "",
        "type": "query",
        "useTags": false
      },
      {
        "allValue": null,
        "current": {
          "selected": true,
          "text": "sla1",
          "value": "sla1"
        },
        "datasource": "mydc",
        "definition": "ipSlaTag{host=~\"$HOST\",owner=\"zc\"}",
        "hide": 0,
        "includeAll": false,
        "label": null,
        "multi": true,
        "name": "SLA",
        "options": [],
        "query": "ipSlaTag{host=~\"$HOST\",owner=\"zc\"}",
        "refresh": 1,
        "regex": "/ipSlaTag=\"(.*?)\"/",
        "skipUrlSync": false,
        "sort": 0,
        "tagValuesQuery": "",
        "tags": [],
        "tagsQuery": "",
        "type": "query",
        "useTags": false
      }
    ]
  },
  "time": {
    "from": "now-2h",
    "to": "now"
  },
  "timepicker": {
    "refresh_intervals": [
      "5s",
      "10s",
      "30s",
      "1m",
      "5m",
      "15m",
      "30m",
      "1h",
      "2h",
      "1d"
    ],
    "time_options": [
      "5m",
      "15m",
      "1h",
      "6h",
      "12h",
      "24h",
      "2d",
      "7d",
      "30d"
    ]
  },
  "timezone": "",
  "title": "mydc",
  "uid": "GmRQdHkmz",
  "version": 59
}

十一、安装alertmanager

wget https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz

tar -zxvf alertmanager-0.21.0.linux-amd64.tar.gz

mv alertmanager-0.21.0.linux-amd64 /usr/local/alertmanager

cd /usr/local/alertmanager

mkdir -p {data,template}

vim alertmanager.yml

global:
  resolve_timeout: 5m
  smtp_from: 2********@qq.com
  smtp_auth_username: 2********
  smtp_auth_password: 1111111111
  smtp_require_tls: false
  smtp_smarthost: smtp.qq.com:25

templates:
  - "/usr/local/alertmanager/template/alertmanager.tmpl"

route:
  group_by: ['instance']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 30m
  receiver: 'all.mail'

  routes:
    - receiver: 'router.mail'
      group_wait: 10s
      match:
        device: router
    - receiver: 'switch.mail'
      group_wait: 10s
      match:
        device: switch

receivers:
- name: 'router.mail'
  email_configs:
  - to: 2********@qq.com
    send_resolved: true


- name: 'switch.mail'
  email_configs:
  - to: '2********@163.com,e***********@sina.com'
    send_resolved: true


- name: 'all.mail'
  email_configs:
  - to: e***********@sina.com
    send_resolved: true


inhibit_rules: 
  - source_match: 
      severity: 'critical'
    target_match:
      severity: 'major'
    equal: ['instance']
  - source_match: 
      severity: 'critical'
    target_match:
      severity: 'minor'
    equal: ['instance']
  - source_match: 
      severity: 'major'
    target_match:
      severity: 'minor'
    equal: ['instance']

vim template/alertmanager.tmpl

{{ define "__text_alert_list" }}
{{ range .  }}
告警名称:{{ .Labels.alertname }}
<br>
告警级别:{{ .Labels.severity }}
<br>
主机地址: {{ .Labels.instance }}
<br>
主机名称: {{ .Labels.host }}
<br>
告警描述: {{ .Annotations.summary }}
<br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }}
<br>
-----------
<br>
{{ end }}
{{ end }}


{{ define "__text_resolve_list" }}
{{ range .  }}
告警名称:{{ .Labels.alertname }}
<br>
告警级别:{{ .Labels.severity }}
<br>
主机地址: {{ .Labels.instance }}
<br>
主机名称: {{ .Labels.host }}
<br>
告警描述: {{ .Annotations.summary }}
<br>
触发时间: {{ .StartsAt.Format "2006-01-02 15:04:05" }}
<br>
恢复时间: {{ .EndsAt.Format "2006-01-02 15:04:05" }}
<br>
-----------
<br>
{{ end }}
{{ end }}



{{ define "email.default.html" }}
{{ if gt (len .Alerts.Firing) 0 -}}
告警列表:
<br>
------------------------
<br>
{{ template "__text_alert_list" .Alerts.Firing }}
<br>
<br>
{{- end }}
{{ if gt (len .Alerts.Resolved) 0 -}}
恢复列表:
<br>
------------------------
<br>
{{ template "__text_resolve_list" .Alerts.Resolved }}
<br>
<br>
{{- end }}
{{- end }}

添加服务

vim /etc/systemd/system/alertmanager.service

[Unit]
Description=alertmanager
After=network.target
#StartLimitIntervalSec=0

[Service]
Type=simple
User=root
ExecStart=/usr/local/alertmanager/alertmanager --storage.path="/usr/local/alertmanager/data/" --config.file=/usr/local/alertmanager/alertmanager.yml --web.external-url=http://192.168.11.109:9093
Restart=always
RestartSec=1
# Restart=on-failure

[Install]
WantedBy=multi-user.target

启动服务

systemctl daemon-reload

systemctl enable alertmanager

systemctl start alertmanager

告警邮件示范:

告警列表: 
------------------------ 
告警名称:router_sla_fail 
告警级别:minor 
主机地址: 192.168.11.106 
主机名称: RT4 
告警描述: mydc:router:RT4:sla2:fail 
触发时间: 2022-08-03 07:44:34 
----------- 

恢复邮件示范:

恢复列表: 
------------------------ 
告警名称:router_sla_fail 
告警级别:minor 
主机地址: 192.168.11.106 
主机名称: RT4 
告警描述: mydc:router:RT4:sla2:fail 
触发时间: 2022-08-03 07:44:34 
恢复时间: 2022-08-03 08:00:34 
----------- 

thanos_rule的告警通知报文,需要抓包才能看见

{
	"labels": {
		"alertname": "ip_sla_fail",
		"appType": "net",
		"device": "router",
		"host": "RT4",
		"idc": "mydc",
		"instance": "192.168.11.106",
		"ip": "192.168.11.106",
		"ipSlaIndex": "1",
		"ipSlaTag": "sla1",
		"job": "network_cisco_ipsla",
		"severity": "minor",
		"slave": "192.168.11.109:9090"
	},
	"annotations": {
		"realvalue": "12",
		"ruleId": "73",
		"summary": "mydc:router:RT4:sla1:fail"
	},
	"startsAt": "2021-07-23T02:48:47.924941435Z",
	"endsAt": "2021-07-23T04:06:47.924941435Z",
	"generatorURL": "192.168.11.109/graph?g0.expr=sum_over_time%28ipsla_status%7Bdevice%3D%22router%22%2Chost%3D~%22.%2B%3F%22%2Cjob%3D%22network_cisco_ipsla%22%7D%5B1m%5D%29+%3C%3D+48\u0026g0.tab=1"
}

标签:__,target,草稿,consul,labels,label,source,prometheus,监控
来源: https://www.cnblogs.com/choujin/p/16552809.html