Prometheus二进制监控
作者:互联网
1.安装prometheus 访问ip:9090
# 下载安装包
[root@master01 ~]# wegt https://github.com/prometheus/prometheus/releases/download/v2.37.0/prometheus-2.37.0.linux-amd64.tar.gz
# 解压并改名
[root@master01 ~]# tar -zxvf prometheus-2.37.0.linux-amd64.tar.gz -C /usr/local/
[root@master01 ~]# mv /usr/local/prometheus-2.16.0.linux-amd64 /usr/local/prometheus
# 配置system管理
# 创建数据存储目录
[root@master01 ~]# mkdir /data/prometheus
[root@ecs-jiankong prom]# cat /etc/systemd/system/prometheus.service
[Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
# Type设置为notify时,服务会不断重启
Type=simple
User=root
# --storage.tsdb.path是可选项,默认数据目录在运行目录的./dada目录中
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/data/prometheus
Restart=on-failure
[Install]
WantedBy=multi-user.target
# 启动服务
[root@master01 ~]# systemctl daemon-reload && systemctl enable --now prometheus && systemctl status prometheus
2.安装grafana 访问:ip+3000
# 下载安装包
[root@master01 ~] wget https://dl.grafana.com/oss/release/grafana-7.0.4-1.x86_64.rpm
# 安装冰启动
[root@master01 ~]# yum -y localinstall grafana-7.0.4-1.x86_64.rpm
[root@master01 ~]# systemctl enable --now grafana-server
[root@master01 ~]# systemctl status grafana-server
# 查看grafana相关目录
[root@master01 ~]# find / -iname grafana
/etc/grafana #配置文件目录
/var/lib/grafana #数据目录
/var/log/grafana #日志目录
/run/grafana 管理进程目录
/data/backup/grafana
/usr/share/grafana # 工作目录
/usr/share/grafana/public/app/plugins/datasource/grafana
# 查看grafana配置文件
[root@master01 ~]# rpm -qc grafana
/etc/init.d/grafana-server
/etc/sysconfig/grafana-server
/usr/lib/systemd/system/grafana-server.service
# 查看&修改数据目录
[root@master01 ~]# cat /etc/sysconfig/grafana-server
GRAFANA_USER=grafana
GRAFANA_GROUP=grafana
GRAFANA_HOME=/usr/share/grafana
LOG_DIR=/var/log/grafana
DATA_DIR=/var/lib/grafana
MAX_OPEN_FILES=10000
CONF_DIR=/etc/grafana
CONF_FILE=/etc/grafana/grafana.ini
RESTART_ON_UPGRADE=true
PLUGINS_DIR=/var/lib/grafana/plugins
PROVISIONING_CFG_DIR=/etc/grafana/provisioning
####################注:自定义存储目录需要授权grafana权限,或者启动使用root用户########
3.安装alertmanager 访问:ip:9094
# 下载安装包
[root@master01 ~]# wget https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-amd64.tar.gz
# 解压&改名
[root@master01 ~]# tar xf alertmanager-0.24.0.linux-amd64.tar.gz -C /usr/local
[root@master01 ~]# mv /usr/localalertmanager-0.24.0.linux-amd64/ /usr/localalertmanager
# 配置system管理
[root@master01 ~]# cat /usr/lib/systemd/system/alertmanager.service
[Unit]
Description=alertmanager System
Documentation=alertmanager System
[Service]
ExecStart=/usr/local/alertmanager/alertmanager --config.file=/usr/local/alertmanager/alertmanager.yml
[Install]
WantedBy=multi-user.target
[root@master01 ~]# systemctl daemon-reload && systemctl enable --now alertmanager
[root@master01 ~]# systemctl status alertmanager
4.安装钉钉webhook
[root@master01 ~]# wget https://github.com/timonwong/prometheus-webhook-dingtalk/releases/download/v2.0.0/prometheus-webhook-dingtalk-2.0.0.linux-amd64.tar.gz
# 解压&&重命名
[root@master01 ~]# tar xf prometheus-webhook-dingtalk-2.0.0.linux-amd64.tar.gz -C /usr/local/
[root@master01 ~]# mv /usr/local/prometheus-webhook-dingtalk-2.0.0.linux-amd64 /usr/local/prometheus-webhook-dingtalk
# 配置告警模板
[root@master01 prometheus-webhook-dingtalk]# cat templates/webhook.tmpl
{{- define "webhook.tmpl" }}
{{- range $i, $alert := .Alerts.Firing -}}
[报警项]:{{ index $alert.Labels "alertname" }}
[实例]:{{ index $alert.Labels "instance" }}
[job]:{{ index $alert.Labels "job" }}
[报警内容]:{{ index $alert.Annotations "summary" }}
[开始时间]:{{ $alert.StartsAt.Format "2006-01-02 15:04:05" }}
====================
{{- end }}
{{- end }}
# 配置钉钉告警文件 注:我这边配置的webhook1用的加签的方式
[root@master01 prometheus-webhook-dingtalk]# cat config.example.yml
····
## Targets, previously was known as "profiles"
templates:
- templates/webhook.tmpl
targets:
webhook1:
url: https://oapi.dingtalk.com/robot/send?access_token=975b182b6f08161b761005a357413cf6fa0b3e35f76844fea837xxxx
# secret for signature
secret: SEC62f9defd6cca8d28c9e2342f917c0a7dbba56635ba51b3axxxx
# 配置system管理
[root@master01 prometheus-webhook-dingtalk]# cat /usr/lib/systemd/system/prometheus-webhook-dingtalk.service
[Unit]
Description=https://github.com/timonwong/prometheus-webhook-dingtalk/releases/
After=network-online.target
[Service]
Restart=on-failure
ExecStart=/usr/local/prometheus-webhook-dingtalk/prometheus-webhook-dingtalk --config.file=/usr/local/prometheus-webhook-dingtalk/config.example.yml
[Install]
WantedBy=multi-user.target
[root@master01 ~]# systemctl daemon-reload && systemctl enable --now prometheus-webhook-dingtalk
[root@master01 ~]# systemctl status prometheus-webhook-dingtalk
# curl测试发信到钉钉,查看钉钉是否收到 但是由于格式问题,信息为空
[root@master01 prometheus-webhook-dingtalk]# curl 'http://localhost:8060/dingtalk/webhook1/send' -H 'Content-Type: application/json' -d '{"msgtype": "ding.link.text","text": {"ding.link.content": "'"test"'"}}'
OK
5.配置告警
[root@master01 alertmanager]# cat alertmanager.yml
route:
group_by: ['alertname']
group_wait: 30s
group_interval: 2m
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://localhost:8060/dingtalk/webhook1/send'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
[root@master01 ~]# systemctl stop prometheus-webhook-dingtalk && systemctl start prometheus-webhook-dingtalk
6.安装node_exporter
# 下载安装包
[root@master01 ~]# wegt https://github.com/prometheus/node_exporter/releases/download/v1.4.0-rc.0/node_exporter-1.4.0-rc.0.linux-amd64.tar.gz
# 解压重命名
[root@master01 ~]# tar xf node_exporter-1.4.0-rc.0.linux-amd64.tar.gz -C /usr/local
[root@master01 ~]# mv /usr/local/node_exporter-1.4.0-rc.0.linux-amd64/ /usr/local/node_exporter
[root@master01 ~]# groupadd prometheus && useradd -g prometheus -s /sbin/nologin prometheus
# 配置system管理
[root@master01 node_exporter]# cat /etc/systemd/system/node_exporter.service
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/node_exporter/node_exporter
Restart=on-failure
[Install]
WantedBy=multi-user.target
# 启动服务
[root@master01 ~]# systemctl daemon-reload && systemctl enable --now node_exporter
[root@master01 ~]# systemctl status node_exporter
7.报警规则
[root@master01 prometheus]# cat rules/first_rules.yml
groups:
- name: 主机状态-监控告警
rules:
- alert: 主机状态
expr: up{job="node_exporter"} == 0
for: 1m
labels:
status: 非常严重
annotations:
summary: "{{$labels.instance}}:服务器宕机"
description: "{{$labels.instance}}:服务器延时超过5分钟"
[root@master01 prometheus]# cat prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "/usr/local/prometheus/rules/first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["localhost:9090"]
- job_name: "node_exporter"
static_configs:
- targets: ["localhost:9100"]
- job_name: "alertmanager"
static_configs:
- targets: ["localhost:9093"]
# 重启下Prometheus
标签:二进制,master01,webhook,prometheus,grafana,Prometheus,usr,监控,root 来源: https://www.cnblogs.com/Applogize/p/16619222.html