135 lines
4.8 KiB
Bash
135 lines
4.8 KiB
Bash
#!/bin/bash
|
||
#
|
||
# CheckMK Local Check für IBM Spectrum Scale CES (Cluster Export Services)
|
||
# Datei: /usr/lib/check_mk_agent/local/mmces_state
|
||
# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/mmces_state
|
||
#
|
||
# Erstellt separate Services für jeden CES-Dienst
|
||
#
|
||
|
||
# Pfad zum mmces Kommando
|
||
MMCES_CMD="/usr/lpp/mmfs/bin/mmces"
|
||
HOSTNAME=$(hostname)
|
||
|
||
# Prüfen ob mmces verfügbar ist
|
||
if [ ! -x "$MMCES_CMD" ]; then
|
||
echo "2 MMCES_Global - CRITICAL: mmces command not found at $MMCES_CMD"
|
||
exit 0
|
||
fi
|
||
|
||
# mmces state show ausführen und Ausgabe parsen
|
||
OUTPUT=$($MMCES_CMD state show -N "$HOSTNAME" 2>&1)
|
||
EXIT_CODE=$?
|
||
|
||
if [ $EXIT_CODE -ne 0 ]; then
|
||
echo "2 MMCES_Global - CRITICAL: mmces command failed with exit code $EXIT_CODE"
|
||
exit 0
|
||
fi
|
||
|
||
# Header-Zeile überspringen und Datenzeile extrahieren
|
||
DATA_LINE=$(echo "$OUTPUT" | grep -v "^NODE" | grep -v "^-" | head -1)
|
||
|
||
if [ -z "$DATA_LINE" ]; then
|
||
echo "2 MMCES_Global - CRITICAL: No data found in mmces output"
|
||
exit 0
|
||
fi
|
||
|
||
# Felder extrahieren (whitespace-separated)
|
||
read -r NODE AUTH BLOCK NETWORK HDFS_NAMENODE AUTH_OBJ NFS OBJ S3 SMB CES <<< "$DATA_LINE"
|
||
|
||
# Funktion zur Status-Konvertierung in CheckMK-Codes
|
||
get_status_code() {
|
||
case "$1" in
|
||
"HEALTHY") echo "0" ;; # OK
|
||
"DISABLED") echo "0" ;; # OK (DISABLED ist normal)
|
||
"DEGRADED"|"WARNING") echo "1" ;; # WARNING
|
||
"UNHEALTHY"|"FAILED"|"ERROR") echo "2" ;; # CRITICAL
|
||
*) echo "3" ;; # UNKNOWN
|
||
esac
|
||
}
|
||
|
||
# Funktion zur Status-Beschreibung
|
||
get_status_text() {
|
||
case "$1" in
|
||
"HEALTHY") echo "OK" ;;
|
||
"DISABLED") echo "OK (Disabled)" ;;
|
||
"DEGRADED"|"WARNING") echo "WARNING" ;;
|
||
"UNHEALTHY"|"FAILED"|"ERROR") echo "CRITICAL" ;;
|
||
*) echo "UNKNOWN" ;;
|
||
esac
|
||
}
|
||
|
||
# Funktion zur Performance-Daten (numerisch für Graphing)
|
||
get_perf_value() {
|
||
case "$1" in
|
||
"HEALTHY") echo "1" ;;
|
||
"DISABLED") echo "0" ;;
|
||
"DEGRADED"|"WARNING") echo "0.5" ;;
|
||
"UNHEALTHY"|"FAILED"|"ERROR") echo "-1" ;;
|
||
*) echo "-2" ;;
|
||
esac
|
||
}
|
||
|
||
# Separate Services für jeden CES-Dienst erstellen
|
||
# Format: STATUS_CODE SERVICENAME - STATUS_TEXT: Details | performance_data
|
||
|
||
# AUTH Service
|
||
STATUS_CODE=$(get_status_code "$AUTH")
|
||
STATUS_TEXT=$(get_status_text "$AUTH")
|
||
PERF_VALUE=$(get_perf_value "$AUTH")
|
||
echo "$STATUS_CODE \"GPFS CES STATE AUTH $HOSTNAME\" - $STATUS_TEXT: Authentication service is $AUTH | auth=$PERF_VALUE"
|
||
|
||
# BLOCK Service
|
||
STATUS_CODE=$(get_status_code "$BLOCK")
|
||
STATUS_TEXT=$(get_status_text "$BLOCK")
|
||
PERF_VALUE=$(get_perf_value "$BLOCK")
|
||
echo "$STATUS_CODE \"GPFS CES STATE BLOCK $HOSTNAME\" - $STATUS_TEXT: Block service is $BLOCK | block=$PERF_VALUE"
|
||
|
||
# NETWORK Service
|
||
STATUS_CODE=$(get_status_code "$NETWORK")
|
||
STATUS_TEXT=$(get_status_text "$NETWORK")
|
||
PERF_VALUE=$(get_perf_value "$NETWORK")
|
||
echo "$STATUS_CODE \"GPFS CES STATE NETWORK $HOSTNAME\" - $STATUS_TEXT: Network service is $NETWORK | network=$PERF_VALUE"
|
||
|
||
# HDFS_NAMENODE Service
|
||
STATUS_CODE=$(get_status_code "$HDFS_NAMENODE")
|
||
STATUS_TEXT=$(get_status_text "$HDFS_NAMENODE")
|
||
PERF_VALUE=$(get_perf_value "$HDFS_NAMENODE")
|
||
echo "$STATUS_CODE \"GPFS CES STATE HDFS_NAMENODE $HOSTNAME\" - $STATUS_TEXT: HDFS NameNode service is $HDFS_NAMENODE | hdfs_namenode=$PERF_VALUE"
|
||
|
||
# AUTH_OBJ Service
|
||
STATUS_CODE=$(get_status_code "$AUTH_OBJ")
|
||
STATUS_TEXT=$(get_status_text "$AUTH_OBJ")
|
||
PERF_VALUE=$(get_perf_value "$AUTH_OBJ")
|
||
echo "$STATUS_CODE \"GPFS CES STATE AUTH_OBJ $HOSTNAME\" - $STATUS_TEXT: Authentication Object service is $AUTH_OBJ | auth_obj=$PERF_VALUE"
|
||
|
||
# NFS Service
|
||
STATUS_CODE=$(get_status_code "$NFS")
|
||
STATUS_TEXT=$(get_status_text "$NFS")
|
||
PERF_VALUE=$(get_perf_value "$NFS")
|
||
echo "$STATUS_CODE \"GPFS CES STATE NFS $HOSTNAME\" - $STATUS_TEXT: NFS service is $NFS | nfs=$PERF_VALUE"
|
||
|
||
# OBJ Service
|
||
STATUS_CODE=$(get_status_code "$OBJ")
|
||
STATUS_TEXT=$(get_status_text "$OBJ")
|
||
PERF_VALUE=$(get_perf_value "$OBJ")
|
||
echo "$STATUS_CODE \"GPFS CES STATE OBJ $HOSTNAME\" - $STATUS_TEXT: Object service is $OBJ | obj=$PERF_VALUE"
|
||
|
||
# S3 Service
|
||
STATUS_CODE=$(get_status_code "$S3")
|
||
STATUS_TEXT=$(get_status_text "$S3")
|
||
PERF_VALUE=$(get_perf_value "$S3")
|
||
echo "$STATUS_CODE \"GPFS CES STATE S3 $HOSTNAME\" - $STATUS_TEXT: S3 service is $S3 | s3=$PERF_VALUE"
|
||
|
||
# SMB Service
|
||
STATUS_CODE=$(get_status_code "$SMB")
|
||
STATUS_TEXT=$(get_status_text "$SMB")
|
||
PERF_VALUE=$(get_perf_value "$SMB")
|
||
echo "$STATUS_CODE \"GPFS CES STATE SMB $HOSTNAME\" - $STATUS_TEXT: SMB service is $SMB | smb=$PERF_VALUE"
|
||
|
||
# CES Service (Overall)
|
||
STATUS_CODE=$(get_status_code "$CES")
|
||
STATUS_TEXT=$(get_status_text "$CES")
|
||
PERF_VALUE=$(get_perf_value "$CES")
|
||
echo "$STATUS_CODE \"GPFS CES STATE CES $HOSTNAME\" - $STATUS_TEXT: CES overall status is $CES | ces=$PERF_VALUE"
|