其他分享
首页 > 其他分享> > Prometheus二进制监控

Prometheus二进制监控

作者:互联网

1.安装prometheus 访问ip:9090
# 下载安装包
[root@master01 ~]# wegt https://github.com/prometheus/prometheus/releases/download/v2.37.0/prometheus-2.37.0.linux-amd64.tar.gz
# 解压并改名
[root@master01 ~]# tar -zxvf prometheus-2.37.0.linux-amd64.tar.gz -C /usr/local/
[root@master01 ~]# mv /usr/local/prometheus-2.16.0.linux-amd64 /usr/local/prometheus
# 配置system管理
# 创建数据存储目录
[root@master01 ~]# mkdir /data/prometheus
[root@ecs-jiankong prom]# cat /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
# Type设置为notify时,服务会不断重启
Type=simple
User=root
# --storage.tsdb.path是可选项,默认数据目录在运行目录的./dada目录中
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/data/prometheus
Restart=on-failure
[Install]
WantedBy=multi-user.target
# 启动服务
[root@master01 ~]# systemctl daemon-reload && systemctl enable --now prometheus && systemctl status  prometheus

2.安装grafana 访问:ip+3000
# 下载安装包
[root@master01 ~] wget https://dl.grafana.com/oss/release/grafana-7.0.4-1.x86_64.rpm
# 安装冰启动
[root@master01 ~]# yum -y localinstall grafana-7.0.4-1.x86_64.rpm
[root@master01 ~]# systemctl enable --now grafana-server
[root@master01 ~]# systemctl status grafana-server
# 查看grafana相关目录
[root@master01 ~]# find / -iname grafana
/etc/grafana   #配置文件目录
/var/lib/grafana  #数据目录
/var/log/grafana #日志目录
/run/grafana 管理进程目录
/data/backup/grafana
/usr/share/grafana # 工作目录
/usr/share/grafana/public/app/plugins/datasource/grafana
# 查看grafana配置文件
[root@master01 ~]# rpm -qc grafana
/etc/init.d/grafana-server
/etc/sysconfig/grafana-server
/usr/lib/systemd/system/grafana-server.service
# 查看&修改数据目录
[root@master01 ~]# cat /etc/sysconfig/grafana-server
GRAFANA_USER=grafana

GRAFANA_GROUP=grafana

GRAFANA_HOME=/usr/share/grafana

LOG_DIR=/var/log/grafana

DATA_DIR=/var/lib/grafana

MAX_OPEN_FILES=10000

CONF_DIR=/etc/grafana

CONF_FILE=/etc/grafana/grafana.ini

RESTART_ON_UPGRADE=true

PLUGINS_DIR=/var/lib/grafana/plugins

PROVISIONING_CFG_DIR=/etc/grafana/provisioning
####################注:自定义存储目录需要授权grafana权限,或者启动使用root用户########

3.安装alertmanager 访问:ip:9094
# 下载安装包
[root@master01 ~]# wget https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz
# 解压&改名
[root@master01 ~]# tar xf alertmanager-0.24.0.linux-amd64.tar.gz  -C /usr/local
[root@master01 ~]# mv /usr/localalertmanager-0.24.0.linux-amd64/ /usr/localalertmanager
# 配置system管理
[root@master01 ~]# cat  /usr/lib/systemd/system/alertmanager.service
[Unit]
Description=alertmanager System
Documentation=alertmanager System
[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml
[Install]
WantedBy=multi-user.target
[root@master01 ~]#  systemctl daemon-reload && systemctl enable --now alertmanager
[root@master01 ~]#  systemctl status alertmanager

4.安装钉钉webhook
[root@master01 ~]# wget https://github.com/timonwong/prometheus-webhook-dingtalk/releases/download/v2.0.0/prometheus-webhook-dingtalk-2.0.0.linux-amd64.tar.gz
# 解压&&重命名
[root@master01 ~]# tar xf prometheus-webhook-dingtalk-2.0.0.linux-amd64.tar.gz -C /usr/local/
[root@master01 ~]# mv /usr/local/prometheus-webhook-dingtalk-2.0.0.linux-amd64 /usr/local/prometheus-webhook-dingtalk
# 配置告警模板
[root@master01  prometheus-webhook-dingtalk]# cat templates/webhook.tmpl 
{{- define "webhook.tmpl" }}
{{- range $i, $alert := .Alerts.Firing -}}
[报警项]:{{ index $alert.Labels "alertname" }}
[实例]:{{ index $alert.Labels "instance" }}
[job]:{{ index $alert.Labels "job" }}
[报警内容]:{{ index $alert.Annotations "summary" }}
[开始时间]:{{ $alert.StartsAt.Format "2006-01-02 15:04:05" }}
====================
{{- end }}
{{- end }}
# 配置钉钉告警文件 注:我这边配置的webhook1用的加签的方式 
[root@master01 prometheus-webhook-dingtalk]# cat config.example.yml 
····
## Targets, previously was known as "profiles"
templates:
  - templates/webhook.tmpl
targets:
  webhook1:
    url: https://oapi.dingtalk.com/robot/send?access_token=975b182b6f08161b761005a357413cf6fa0b3e35f76844fea837xxxx
    # secret for signature
    secret: SEC62f9defd6cca8d28c9e2342f917c0a7dbba56635ba51b3axxxx
# 配置system管理
[root@master01 prometheus-webhook-dingtalk]# cat /usr/lib/systemd/system/prometheus-webhook-dingtalk.service
[Unit]
Description=https://github.com/timonwong/prometheus-webhook-dingtalk/releases/
After=network-online.target
[Service]
Restart=on-failure
ExecStart=/usr/local/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk --config.file=/usr/local/prometheus-webhook-dingtalk/config.example.yml
 
[Install]
WantedBy=multi-user.target

[root@master01 ~]#  systemctl daemon-reload && systemctl enable --now prometheus-webhook-dingtalk
[root@master01 ~]#  systemctl status prometheus-webhook-dingtalk
# curl测试发信到钉钉,查看钉钉是否收到 但是由于格式问题,信息为空
[root@master01 prometheus-webhook-dingtalk]# curl 'http://localhost:8060/dingtalk/webhook1/send'    -H 'Content-Type: application/json'    -d '{"msgtype": "ding.link.text","text": {"ding.link.content": "'"test"'"}}'
OK

5.配置告警
[root@master01 alertmanager]# cat alertmanager.yml 
route:
  group_by: ['alertname']
  group_wait: 30s
  group_interval: 2m
  repeat_interval: 1h
  receiver: 'web.hook'
receivers:
  - name: 'web.hook'
    webhook_configs:
      - url: 'http://localhost:8060/dingtalk/webhook1/send'
        send_resolved: true
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']
[root@master01 ~]#  systemctl stop prometheus-webhook-dingtalk && systemctl start prometheus-webhook-dingtalk

6.安装node_exporter
# 下载安装包
[root@master01 ~]# wegt https://github.com/prometheus/node_exporter/releases/download/v1.4.0-rc.0/node_exporter-1.4.0-rc.0.linux-amd64.tar.gz
# 解压重命名
[root@master01 ~]# tar xf node_exporter-1.4.0-rc.0.linux-amd64.tar.gz  -C /usr/local
[root@master01 ~]# mv /usr/local/node_exporter-1.4.0-rc.0.linux-amd64/ /usr/local/node_exporter
[root@master01 ~]# groupadd prometheus && useradd -g prometheus -s /sbin/nologin prometheus
# 配置system管理
[root@master01  node_exporter]# cat /etc/systemd/system/node_exporter.service
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/node_exporter/node_exporter
Restart=on-failure
[Install]
WantedBy=multi-user.target
# 启动服务
[root@master01 ~]#  systemctl daemon-reload && systemctl enable --now node_exporter
[root@master01 ~]#  systemctl status node_exporter

7.报警规则
[root@master01 prometheus]# cat rules/first_rules.yml 
groups:
    - name: 主机状态-监控告警
      rules:
      - alert: 主机状态
        expr: up{job="node_exporter"} == 0
        for: 1m
        labels:
          status: 非常严重
        annotations:
          summary: "{{$labels.instance}}:服务器宕机"
          description: "{{$labels.instance}}:服务器延时超过5分钟"
          
[root@master01 prometheus]# cat prometheus.yml 
# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets: ['localhost:9093']

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  - "/usr/local/prometheus/rules/first_rules.yml"
  # - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
      - targets: ["localhost:9090"]
  - job_name: "node_exporter"
    static_configs:
      - targets: ["localhost:9100"]
  - job_name: "alertmanager"
    static_configs:
      - targets: ["localhost:9093"]
# 重启下Prometheus

标签:二进制,master01,webhook,prometheus,grafana,Prometheus,usr,监控,root
来源: https://www.cnblogs.com/Applogize/p/16619222.html