File tree Expand file tree Collapse file tree 3 files changed +37
-0
lines changed
Expand file tree Collapse file tree 3 files changed +37
-0
lines changed Original file line number Diff line number Diff line change @@ -24,6 +24,24 @@ groups:
2424 summary: "Prometheus exporter at {{ $labels.instance }} reports low memory"
2525 description: "Available memory is {{ $value }} GiB."
2626
27+ - alert: LowSwapSpace
28+ expr: (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes) < {% endraw %}{{ alertmanager_node_free_swap_warning_threshold_ratio }}{% raw %}
29+ for: 1m
30+ labels:
31+ severity: warning
32+ annotations:
33+ summary: "Swap space at {{ $labels.instance }} reports low memory"
34+ description: "Available swap space is {{ $value | humanizePercentage }}. Running out of swap space causes OOM Kills."
35+
36+ - alert: LowSwapSpace
37+ expr: (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes) < {% endraw %}{{ alertmanager_node_free_swap_critical_threshold_ratio }}{% raw %}
38+ for: 1m
39+ labels:
40+ severity: critical
41+ annotations:
42+ summary: "Swap space at {{ $labels.instance }} reports low memory"
43+ description: "Available swap space is {{ $value | humanizePercentage }}. Running out of swap space causes OOM Kills."
44+
2745 - alert: HostOomKillDetected
2846 expr: increase(node_vmstat_oom_kill[5m]) > 0
2947 for: 5m
Original file line number Diff line number Diff line change @@ -12,6 +12,12 @@ alertmanager_low_memory_threshold_gib: 5
1212# link. Change to false to disable this alert.
1313alertmanager_warn_network_bond_single_link : true
1414
15+ # Threshold to trigger an LowSwapSpace alert on swap space depletion (ratio).
16+ # When the ratio of free swap space is lower than each of these values, warning
17+ # and critical alerts will be triggered respectively.
18+ alertmanager_node_free_swap_warning_threshold_ratio : 0.25
19+ alertmanager_node_free_swap_critical_threshold_ratio : 0.1
20+
1521# ##############################################################################
1622# Exporter configuration
1723
Original file line number Diff line number Diff line change 1+ ---
2+ features :
3+ - |
4+ Added two alerts (Warning and critical) that are triggered when the ratio
5+ of (free_swap_sppace / total_swap_space) is below thresholds.
6+ Each threshold can be modified by alterting value of
7+ ``alertmanager_node_free_swap_warning_threshold_ratio`` and
8+ ``alertmanager_node_free_swap_critical_threshold_ratio``.
9+
10+ Currently this solution has limitation of having one-size fits all policy.
11+ This can cause unwanted alerts for the hosts which utilise swap heavily
12+ Therefore it is recommended to tune the thresholds or apply silence rules
13+ for the needs.
You can’t perform that action at this time.
0 commit comments