示例 - 对主机进行监控告警
准备条件
定义告警规则
rule_files:
- /etc/prometheus/rule_files/host.ymlgroups:
- name: Host
rules:
- alert: HostCPU
expr: 100 * (1 - avg(irate(node_cpu_seconds_total{mode="idle"}[2m])) by(instance)) > 80
for: 5m
labels:
serverity: high
annotations:
summary: "{{$labels.instance}}: High CPU Usage Detected"
description: "{{$labels.instance}}: CPU usage is {{$value}}, above 80%"
- alert: HostMemory
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 80
for: 5m
labels:
serverity: middle
annotations:
summary: "{{$labels.instance}}: High Memory Usage Detected"
description: "{{$labels.instance}}: Memory Usage i{{ $value }}, above 80%"
- alert: HostDisk
expr: 100 * (node_filesystem_size_bytes{fstype=~"xfs|ext4"} - node_filesystem_available_bytes) / node_filesystem_size_bytes > 80
for: 5m
labels:
serverity: low
annotations:
summary: "{{$labels.instance}}: High Disk Usage Detected"
description: "{{$labels.instance}}, mountpoint {{$labels.mountpoint}}: Disk Usage is {{ $value }}, above 80%"



Last updated