Prometheus监控(三)—— 钉钉和企业微信告警
生活随笔
收集整理的這篇文章主要介紹了
Prometheus监控(三)—— 钉钉和企业微信告警
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
一、prometheus 實現釘釘和企業微信告警
基礎流程
1.1 釘釘通知
altermanager基礎設置可以參照: https://editor.csdn.net/md/?articleId=121845743
釘釘群設置
群設置 -> 智能群助手 -> 添加機器人 -> 自定義 -> 添加 -> 保存生成的webhook地址
1.1.1 測試發送信息 - 關鍵字認證
root@prometheus:~# mkdir data/scripts -p root@prometheus:~# cd data/scripts/ root@prometheus:~/data/scripts# vim dinngding-keyworlds.sh #/bin/bash source /etc/profile MESSAGE=$1 curl -X "POST" '你生成的Webhook地址' \ -H 'Content-Type:application/json' \ -d '{ "msgtype" : "text","text" : {"content":"'${MESSAGE}'"} }' root@prometheus:~/data/scripts# chmod +x dinngding-keyworlds.sh root@prometheus:~/data/scripts# bash dinngding-keyworlds.sh "namespace=defalt\npod=pod1\ncpu=90%\n持續時間=8s\nalertname=pod" {"errcode":0,"errmsg":"ok"}1.1.1.1 部署webhook-dingtalk
github地址 : https://github.com/timonwong/prometheus-webhook-dingtalk
root@prometheus:/apps# wget https://github.com/timonwong/prometheus-webhook-dingtalk/releases/download/v1.4.0/prometheus-webhook-dingtalk-1.4.0.linux-amd64.tar.gz root@prometheus:/apps# tar xf prometheus-webhook-dingtalk-1.4.0.linux-amd64.tar.gz root@prometheus:/apps/prometheus-webhook-dingtalk-1.4.0.linux-amd64# ./prometheus-webhook-dingtalk --web.listen-address="0.0.0.0:8060" --ding.profile="alertname=你的webhook地址" level=info ts=2022-02-10T03:49:21.015Z caller=main.go:62 msg="Starting prometheus-webhook-dingtalk" version="(version=1.4.0, branch=HEAD, revision=02fe8265a98ab4caaa78ebbed209d3f06b87b4a6)" level=info ts=2022-02-10T03:49:21.016Z caller=main.go:63 msg="Build context" (gogo1.13.5,userroot@eb9f8d8f0437,date20191211-03:00:38)=(MISSING) level=warn ts=2022-02-10T03:49:21.016Z caller=main.go:105 msg="DEPRECATION: Detected one of the following flags: --ding.profile, --ding.timeout, --template.file" level=warn ts=2022-02-10T03:49:21.016Z caller=main.go:106 msg="DEPRECATION: Now working in compatibility mode, please consider upgrading your configurations" level=info ts=2022-02-10T03:49:21.016Z caller=main.go:117 component=configuration msg="Loading templates" templates= ts=2022-02-10T03:49:21.016Z caller=main.go:133 component=configuration msg="Webhook urls for prometheus alertmanager" urls=http://0.0.0.0:8060/dingtalk/alertname/send level=info ts=2022-02-10T03:49:21.016Z caller=web.go:210 component=web msg="Start listening for connections" address=0.0.0.0:8060 level=info ts=2022-02-10T03:49:21.428Z caller=entry.go:22 component=web http_scheme=http http_proto=HTTP/1.1 http_method=POST remote_addr=10.0.0.61:55076 user_agent=Alertmanager/0.23.0 uri=http://10.0.0.61:8060/dingtalk/alertname/send resp_status=200 resp_bytes_length=2 resp_elapsed_ms=184.460895 msg="request complete"#測試一下 root@prometheus:~# telnet 10.0.0.61 8060 Trying 10.0.0.61... Connected to 10.0.0.61. Escape character is '^]'.HTTP/1.1 400 Bad Request Content-Type: text/plain; charset=utf-8 Connection: close400 Bad RequestConnection closed by foreign host.1.1.1.2 配置alertmanager
root@prometheus:/apps/alertmanager# vim alertmanager.yml ---#修改接受者receiver: 'dingding' receivers:#添加釘釘 - name: dingdingwebhook_configs:- url:'http://10.0.0.61:8060/dingtalk/altername/send'send_resolved: true1.1.1.3 驗證
1.1.2 測試發送信息 - 加簽認證
1.1.2.1 配置加簽
1.1.2.2 加簽認證-獲取認證
root@prometheus:/apps/alertmanager# apt install python2 root@prometheus:~# vim data/scripts/dingding-label-sign.py #!/usr/bin/python2.7 import time import hmac import hashlib import base64 import urllib timestamp=long(round(time.time())*1000) secret='你的加簽生成的秘鑰' secret_enc=bytes(secret).encode('utf-8') string_to_sign='{}\n{}'.format(timestamp,secret) string_to_sign_enc=bytes(string_to_sign).encode('utf-8') hmac_code=hmac.new(secret_enc,string_to_sign_enc,digestmod=hashlib.sha256).digest() sign=urllib.quote_plus(base64.b64encode(hmac_code)) print(timestamp) print(sign) #生成時間戳和認證 root@prometheus:~# python2.7 data/scripts/dingding-label-sign.py1.1.2.3 消息發送腳本
#測試腳本可用 root@prometheus:~# vim /root/data/scripts/dingding-label-send.sh #!/bin/bash source /etc/profile MESSAGE=$1 secret='你的加簽生成的秘鑰' getkey=$(python2.7 /root/data/scripts/dingding-label-sign.py) timestamp=${getkey:0:13} sign=$(echo "${getkey:13:100}"|tr -d '\n') # DateStamp=$(date -d @${getkey:0:10}"+%F%H:%m:%S")curl -X "POST" "你的webhook地址×tamp=${timestamp}&sign=${sign}" \ -H 'Content-Type:application/json' \ -d '{ "msgtype" : "text","text" : {"content":"'${MESSAGE}'"} }'root@prometheus:~# bash /root/data/scripts/dingding-label-send.sh sss {"errcode":0,"errmsg":"ok"}1.1.2.4 webhook啟動
#先獲取當前時間戳和認證秘鑰 root@prometheus:~# python2.7 /root/data/scripts/dingding-label-sign.py #啟動webhook的dingtalk root@prometheus:/apps/prometheus-webhook-dingtalk-1.4.0.linux-amd64# ./prometheus-webhook-dingtalk --web.listen-address="0.0.0.0:8060" --ding.profile="alertname=你的webhook地址×tamp=生成的時間戳&sign=生成的認證秘鑰" level=info ts=2022-02-10T05:22:45.778Z caller=main.go:62 msg="Starting prometheus-webhook-dingtalk" version="(version=1.4.0, branch=HEAD, revision=02fe8265a98ab4caaa78ebbed209d3f06b87b4a6)" level=info ts=2022-02-10T05:22:45.778Z caller=main.go:63 msg="Build context" (gogo1.13.5,userroot@eb9f8d8f0437,date20191211-03:00:38)=(MISSING) level=warn ts=2022-02-10T05:22:45.779Z caller=main.go:105 msg="DEPRECATION: Detected one of the following flags: --ding.profile, --ding.timeout, --template.file" level=warn ts=2022-02-10T05:22:45.779Z caller=main.go:106 msg="DEPRECATION: Now working in compatibility mode, please consider upgrading your configurations" level=info ts=2022-02-10T05:22:45.779Z caller=main.go:117 component=configuration msg="Loading templates" templates= ts=2022-02-10T05:22:45.779Z caller=main.go:133 component=configuration msg="Webhook urls for prometheus alertmanager" urls=http://0.0.0.0:8060/dingtalk/alertname/send level=info ts=2022-02-10T05:22:45.779Z caller=web.go:210 component=web msg="Start listening for connections" address=0.0.0.0:8060 level=info ts=2022-02-10T05:22:46.788Z caller=entry.go:22 component=web http_scheme=http http_proto=HTTP/1.1 http_method=POST remote_addr=10.0.0.61:59396 user_agent=Alertmanager/0.23.0 uri=http://10.0.0.61:8060/dingtalk/alertname/send resp_status=200 resp_bytes_length=2 resp_elapsed_ms=908.904779 msg="request complete"1.1.2.5 進行驗證
1.2 企業微信通知
1.2.1 創建應用
登錄pc的企業微信 -> 應用管理 ->創建應用
1.2.2 測試發送信息
1.2.3 驗證測試信息
1.2.4 alertmanager配置
#修改配置文件 root@prometheus:/apps/alertmanager# vim alertmanager.yml --- route:group_by: ['alertname']group_wait: 10sgroup_interval: 2srepeat_interval: 2m#receiver: 'web.hook'#receiver: dingdingreceiver: wechat ---- - name: wechatwechat_configs:- corp_id: 你的企業IDto_user: '@all' #發送給所有人agent_id: 你的應用idapi_secret: 你的應用秘鑰send_resolved: true1.2.5 驗證信息
1.2.6 消息發送給指定組
1.2.6.1 獲取部門ID
1.2.6.2 alertmanager配置
root@prometheus:/apps/alertmanager# vim alertmanager.yml - name: wechatwechat_configs:- corp_id: 你的企業ID#to_user: '@all'to_party: 1 #指定部門IDagent_id: 你的應用IDapi_secret: 你的應用secretsend_resolved: true root@prometheus:/apps/alertmanager# systemctl restart alertmanager1.2.6.3 驗證信息
1.3 消息分類發送
根據消息中的屬性信息設置規則,將消息分類發送,如將severity級別為critical的通知信息發送到郵箱,其他發送到微信
1.3.1 alertmanager設置
route:group_by: ['alertname']group_wait: 10sgroup_interval: 2srepeat_interval: 2m#receiver: 'web.hook'#receiver: dingdingreceiver: wechatroutes: #添加信息路由- receiver: web.hook #critical級別的信息發送到郵箱group_wait: 10smatch_re:severity: critical1.3.2 驗證信息
1.4 自定義消息模板
默認的消息內容需要調整,而且信息是連接在一起的
1.4.1 定義模板
root@prometheus:/apps/alertmanager# vim alertmanager-wechat.tmpl {{ define "wechat.default.message" }} {{- if gt (len .Alerts.Firing) 0 -}} {{- range $index, $alert := .Alerts -}}========= 監控告警 ========= 告警程序: Alertmanager 告警類型: {{ $alert.Labels.alertname }} 告警級別: {{ $alert.Labels.severity }} 級 告警狀態: {{ .Status }} 故障主機: {{ $alert.Labels.instance }} {{ $alert.Labels.device }} 告警主題: {{ .Annotations.summary }} 告警詳情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}} 主機標簽: {{ range .Labels.SortedPairs }} [{{ .Name }}: {{ .Value | html }} ] {{- end }} 故障時間: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }} ========= = end = ========= {{- end }} {{- end }}{{- if gt (len .Alerts.Resolved) 0 -}} {{- range $index, $alert := .Alerts -}}========= 告警恢復 ========= 告警程序: Alertmanager 告警主題: {{ $alert.Annotations.summary }} 告警主機: {{ .Labels.instance }} 告警類型: {{ .Labels.alertname }} 告警級別: {{ $alert.Labels.severity }} 級 告警狀態: {{ .Status }} 告警詳情: {{ $alert.Annotations.message }}{{ $alert.Annotations.description}} 故障時間: {{ ($alert.StartsAt.Add 28800e9).Format "2006-01-02 15:04:05" }} 恢復時間: {{ ($alert.EndsAt.Add 28800e9).Format "2006-01-02 15:04:05" }} ========= = end = ========= {{- end }} {{- end }} {{- end }}1.4.2 alertmanager引用模板
root@prometheus:/apps/alertmanager# vim alertmanager.yml --- #添加模板 templates:- /apps/alertmanager/alertmanager-wechat.tmplroot@prometheus:/apps/alertmanager# systemctl restart alertmanager1.5 告警抑制和靜默
1.5.1 告警抑制
基于告警規則,超過80%就不在發60%的告警,即由60%的表達式觸發的告警被抑制了
root@prometheus:/apps/prometheus# vim rules.yml groups: - name: altermanager_pod.rulesrules:- alert: 磁盤容量expr: 100-(node_filesystem_free_bytes{fstype=~"ext4|xfs"}/node_filesystem_size_bytes{fstype=~"ext4|xfs"}*100)>30 #故意寫小for: 2slabels:severity: criticalannotations:description: "{{$labels.mountpoint}} 磁盤分區使用大于30%(目前使用:{{$value}}%)"summary: "{{$labels.mountpoint}} 磁盤分區使用率過高!"- alert: 磁盤容量expr: 100-(node_filesystem_free_bytes{fstype=~"ext4|xfs"}/node_filesystem_size_bytes{fstype=~"ext4|xfs"}*100)>20 #故意寫小for: 2slabels:severity: warningannotations:description: "{{$labels.mountpoint}} 磁盤分區使用大于20%(目前使用:{{$value}}%)"summary: "{{$labels.mountpoint}} 磁盤分區使用率過高!" root@prometheus:/apps/prometheus# systemctl restart prometheus.service root@prometheus:/apps/prometheus# systemctl restart alertmanager.service進行驗證
1.5.2 手動靜默
先找到要靜默的告警事件,然后手動靜默指定的事件
1.5.2.1 點擊靜默
1.5.2.2 填寫信息并創建
1.5.2.3 查看并驗證
進行驗證
總結
以上是生活随笔為你收集整理的Prometheus监控(三)—— 钉钉和企业微信告警的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 蚂蚁移动开发平台mPaaS:金融业务增长
- 下一篇: 在你的项目中集成 Android pay