first commit
This commit is contained in:
commit
0cc555afd9
372
runchecks.yml
Normal file
372
runchecks.yml
Normal file
@ -0,0 +1,372 @@
|
||||
---
|
||||
- name: Checks
|
||||
hosts: all
|
||||
tasks:
|
||||
- name: Install Packages
|
||||
apt:
|
||||
name:
|
||||
- smartmontools
|
||||
update_cache: no
|
||||
install_recommends: no
|
||||
|
||||
- name: Create checks dir /usr/local/sbin/runchecks.d
|
||||
ansible.builtin.file:
|
||||
path: /usr/local/sbin/runchecks.d
|
||||
owner: root
|
||||
group: root
|
||||
state: directory
|
||||
mode: '0700'
|
||||
|
||||
- name: /usr/local/sbin/runchecks.sh
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.sh
|
||||
mode: "0500"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
. /etc/bash/gaboshlib.include
|
||||
g_lockfile
|
||||
g_nice
|
||||
g_all-to-syslog
|
||||
g_echo_ok "Starting $0"
|
||||
while true
|
||||
do
|
||||
g_echo "Waiting 5min"
|
||||
sleep 300
|
||||
g_echo "Next Loop"
|
||||
find /usr/local/sbin/runchecks.d -name "*.check" -type f | sort | while read check
|
||||
do
|
||||
g_echo "Running: $check"
|
||||
. "$check"
|
||||
done
|
||||
done
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/services
|
||||
lineinfile:
|
||||
path: /usr/local/sbin/runchecks.d/services
|
||||
create: yes
|
||||
insertbefore: EOF
|
||||
line: "cron rsyslog sshd"
|
||||
|
||||
- name: /usr/local/sbin/runchecks.sh shebang
|
||||
lineinfile:
|
||||
path: /usr/local/sbin/runchecks.sh
|
||||
insertbefore: BOF
|
||||
line: "#!/bin/bash"
|
||||
|
||||
- name: /etc/systemd/system/runchecks.service
|
||||
blockinfile:
|
||||
path: /etc/systemd/system/runchecks.service
|
||||
create: yes
|
||||
mode: 0444
|
||||
owner: root
|
||||
group: root
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
[Unit]
|
||||
Description=checks
|
||||
After=syslog.target network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
ExecStart=/usr/local/sbin/runchecks.sh
|
||||
Restart=on-abort
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
notify:
|
||||
- Restart systemd
|
||||
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/disks.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/disks.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
for disk in $(find /dev/ \( -name "nvme?" -o -name "sd?" \) \( -type b -o -type c \))
|
||||
do
|
||||
# check if the disk is used and not suspended
|
||||
dev=$(basename $disk)
|
||||
[ -f ${g_tmp}/${dev}-stat ] || cat /sys/block/${dev}/stat | perl -pe 's/ +/ /g' | cut -d" " -f1,2,3,4,5,6,7,8 > ${g_tmp}/${dev}-stat
|
||||
cat /sys/block/${dev}/stat | perl -pe 's/ +/ /g' | cut -d" " -f1,2,3,4,5,6,7,8 > ${g_tmp}/${dev}-stat-now
|
||||
if diff ${g_tmp}/${dev}-stat-now ${g_tmp}/${dev}-stat >/dev/null 2>&1
|
||||
then
|
||||
cat ${g_tmp}/${dev}-stat-now >${g_tmp}/${dev}-stat
|
||||
# Check disk SMART
|
||||
OPT=""
|
||||
udevadm info --query=all -n $disk | grep -q usb- && OPT="-d sat"
|
||||
smartctl -H $OPT $disk >${g_tmp}/smartctl
|
||||
if ! cat ${g_tmp}/smartctl | egrep -iq "SMART overall-health self-assessment test result: PASSED|SMART Health Status: OK"
|
||||
then
|
||||
g_echo_error "SMART of $disk unhealthy: $(cat ${g_tmp}/smartctl)"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
validate: /bin/bash -n %s
|
||||
backup: yes
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/internet.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/internet.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
testip=9.9.9.9
|
||||
if ! ping -c3 ${testip} >/dev/null 2>&1
|
||||
then
|
||||
g_echo_error "No Internet connection? ping $testip failed!"
|
||||
fi
|
||||
validate: /bin/bash -n %s
|
||||
backup: yes
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/processes.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/processes.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
# process check
|
||||
mkdir -p ${g_tmp}/proc-check
|
||||
# Ggf wieder OK-Prozesse löschen
|
||||
ls ${g_tmp}/proc-check | while read odzpid
|
||||
do
|
||||
if ! egrep -q "^State:.+D \(|State:.+Z \(|State:.+R \(" /proc/$odzpid/status 2>/dev/null
|
||||
then
|
||||
rm ${g_tmp}/proc-check/$odzpid
|
||||
fi
|
||||
done
|
||||
# Neue oder alte Finden und bei Status älter als 30 Minuten melden
|
||||
egrep "^State:.+D \(|State:.+Z \(|State:.+R \(" /proc/[0-9]*/status | cut -d/ -f3 | while read dzpid
|
||||
do
|
||||
if [ -f ${g_tmp}/proc-check/$dzpid ]
|
||||
then
|
||||
timestamp=$(ls --full-time ${g_tmp}/proc-check/$dzpid | sed 's/ */ /g' | cut -d" " -f6,7)
|
||||
date >>${g_tmp}/proc-check/$dzpid
|
||||
touch -d "$timestamp" ${g_tmp}/proc-check/$dzpid
|
||||
if find ${g_tmp}/proc-check/$dzpid -mmin +30 | grep -q $dzpid
|
||||
then
|
||||
ps aufx | grep $dzpid | grep -v grep >${g_tmp}/proc-check-$dzpid.notify
|
||||
pstree $dzpid >>${g_tmp}/proc-check-$dzpid.notify
|
||||
cat ${g_tmp}/proc-check/$dzpid >>${g_tmp}/proc-check-$dzpid.notify
|
||||
g_echo_error "$(cat ${g_tmp}/proc-check-$dzpid.notify)"
|
||||
fi
|
||||
else
|
||||
date >${g_tmp}/proc-check/$dzpid
|
||||
fi
|
||||
done
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/thermal.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/thermal.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
if [ -f /sys/class/thermal/thermal_zone0/temp ]
|
||||
then
|
||||
let temp=`cat /sys/class/thermal/thermal_zone0/temp`/1000
|
||||
if [ $temp -gt 75 ]
|
||||
then
|
||||
g_echo_error "Server temperature high $temp°C"
|
||||
fi
|
||||
fi
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/measuretemp.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/measuretemp.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
if which vcgencmd >/dev/null 2&>1
|
||||
then
|
||||
temp=$(vcgencmd measure_temp | grep ^temp= | cut -d"=" -f2 | cut -d. -f1)
|
||||
if [ $temp -gt 75 ]
|
||||
then
|
||||
g_echo_error "Server Temperatur zu hoch $temp°C"
|
||||
fi
|
||||
fi
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/load.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/load.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
let maxload=$(cat /proc/cpuinfo | grep ^processor | wc -l)+10
|
||||
load=`cat /proc/loadavg | cut -d" " -f1`
|
||||
intload=`echo $load | cut -d'.' -f1`
|
||||
if [ $intload -gt $maxload ]
|
||||
then
|
||||
g_echo_error "System Load high at $load"
|
||||
fi
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/services.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/services.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
for service in $(cat /usr/local/sbin/runchecks.d/services | sort)
|
||||
do
|
||||
if ! ps aux | grep -v grep | grep -q "$service"
|
||||
then
|
||||
g_echo_error "$service down"
|
||||
fi
|
||||
done
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/df.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/df.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
df -al | egrep -v ' - |Mounted on|/rom' | sed 's/ */ /g; s/\%//' | cut -d" " -f1,5 | sort -u | while read i
|
||||
do
|
||||
disk=`echo $i | cut -d" " -f1`
|
||||
usa=`echo $i | cut -d" " -f2`
|
||||
if [ $usa -gt 88 ]
|
||||
then
|
||||
g_echo_error "Space of $disk full: $usa%"
|
||||
fi
|
||||
done
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/dfinodes.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/dfinodes.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
df -ali | egrep -v ' - |Mounted on|/rom' | sed 's/ */ /g; s/\%//' | cut -d" " -f1,5 | sort -u | while read i
|
||||
do
|
||||
disk=`echo $i | cut -d" " -f1`
|
||||
usa=`echo $i | cut -d" " -f2`
|
||||
if [ $usa -gt 88 ]
|
||||
then
|
||||
g_echo_error "Inodes if $disk full: $usa%"
|
||||
fi
|
||||
done
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/freemem.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/freemem.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
mem=`free | grep "^Mem:" | sed 's/ */ /g' | cut -d" " -f 4`
|
||||
if [ $mem -lt 20480 ]
|
||||
then
|
||||
g_echo_error "Memory full! Free: ${mem}kB"
|
||||
fi
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: /usr/local/sbin/runchecks.d/freeswap.check
|
||||
blockinfile:
|
||||
path: /usr/local/sbin/runchecks.d/freeswap.check
|
||||
mode: "0400"
|
||||
owner: root
|
||||
group: root
|
||||
create: yes
|
||||
marker: "# {mark} ANSIBLE MANAGED BLOCK"
|
||||
block: |
|
||||
if ! [ $(free | grep "^Swap:" | sed 's/ */ /g' | cut -d" " -f 2) == 0 ]
|
||||
then
|
||||
mem=`free | grep "^Swap:" | sed 's/ */ /g' | cut -d" " -f 4`
|
||||
if [ $mem -lt 50000 ]
|
||||
then
|
||||
g_echo_error "Swap full! Free: ${mem}kB"
|
||||
fi
|
||||
fi
|
||||
backup: yes
|
||||
validate: /bin/bash -n %s
|
||||
notify:
|
||||
- Restart runchecks
|
||||
|
||||
- name: 'add runchecks to startup'
|
||||
command: systemctl enable runchecks
|
||||
args:
|
||||
creates: /etc/systemd/system/multi-user.target.wants/runchecks.service
|
||||
|
||||
|
||||
handlers:
|
||||
|
||||
- name: Restart systemd
|
||||
ansible.builtin.shell: systemctl daemon-reload
|
||||
notify: Restart runchecks
|
||||
|
||||
- name: Restart runchecks
|
||||
service:
|
||||
name: runchecks
|
||||
state: restarted
|
||||
|
Loading…
Reference in New Issue
Block a user