Alerts


/etc/prometheus/rules/diskspace.yml > diskspace
low ZFS disk space (0 active)
low disk space (0 active)
alert: low
  disk space
expr: node_filesystem_avail_bytes{job="node",mountpoint!~"/boot(/.*)?|/var/lib/incus/(guestapi|shmounts)|/var/lib/lxcfs"}
  < 1024 * 1024 * 1024
for: 1m
annotations:
  summary: Less than 1GiB of free space on {{ $labels.instance | reReplaceAll ":\\d+"
    "" }}:{{ $labels.mountpoint }}
/etc/prometheus/rules/prometheus.yml > prometheus
failed scrape targets (0 active)
alert: failed
  scrape targets
expr: count
  by (job, instance) (up == 0)
for: 1m
annotations:
  summary: Prometheus job {{ $labels.job }} failed to scrape {{ $labels.instance }}
/etc/prometheus/rules/systemd.yml > systemd
failed units (1 active)
alert: failed
  units
expr: node_systemd_unit_state{job="node",state="failed"}
  > 0
for: 1m
annotations:
  summary: systemd unit {{ $labels.name }} failed on {{ $labels.instance | reReplaceAll
    ":\\d+" "" }}
Labels State Active Since Value
alertname="failed units" instance="lemmy.debian.social:9100" job="node" name="lemmy.service" state="failed" type="simple" firing 2025-11-16 18:22:45.036565074 +0000 UTC 1
is-system-running (1 active)
alert: is-system-running
expr: node_systemd_system_running{job="node"}
  == 0
for: 1m
annotations:
  summary: systemd system {{ $labels.instance | reReplaceAll ":\\d+" ""
    }} is not running
Labels State Active Since Value
alertname="is-system-running" instance="lemmy.debian.social:9100" job="node" firing 2025-11-16 18:22:45.036565074 +0000 UTC 0