135 lines
4.8 KiB
Bash
135 lines
4.8 KiB
Bash
|
|
#!/bin/bash
|
|||
|
|
#
|
|||
|
|
# CheckMK Local Check für IBM Spectrum Scale CES (Cluster Export Services)
|
|||
|
|
# Datei: /usr/lib/check_mk_agent/local/mmces_state
|
|||
|
|
# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/mmces_state
|
|||
|
|
#
|
|||
|
|
# Erstellt separate Services für jeden CES-Dienst
|
|||
|
|
#
|
|||
|
|
|
|||
|
|
# Pfad zum mmces Kommando
|
|||
|
|
MMCES_CMD="/usr/lpp/mmfs/bin/mmces"
|
|||
|
|
HOSTNAME=$(hostname)
|
|||
|
|
|
|||
|
|
# Prüfen ob mmces verfügbar ist
|
|||
|
|
if [ ! -x "$MMCES_CMD" ]; then
|
|||
|
|
echo "2 MMCES_Global - CRITICAL: mmces command not found at $MMCES_CMD"
|
|||
|
|
exit 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# mmces state show ausführen und Ausgabe parsen
|
|||
|
|
OUTPUT=$($MMCES_CMD state show -N "$HOSTNAME" 2>&1)
|
|||
|
|
EXIT_CODE=$?
|
|||
|
|
|
|||
|
|
if [ $EXIT_CODE -ne 0 ]; then
|
|||
|
|
echo "2 MMCES_Global - CRITICAL: mmces command failed with exit code $EXIT_CODE"
|
|||
|
|
exit 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Header-Zeile überspringen und Datenzeile extrahieren
|
|||
|
|
DATA_LINE=$(echo "$OUTPUT" | grep -v "^NODE" | grep -v "^-" | head -1)
|
|||
|
|
|
|||
|
|
if [ -z "$DATA_LINE" ]; then
|
|||
|
|
echo "2 MMCES_Global - CRITICAL: No data found in mmces output"
|
|||
|
|
exit 0
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Felder extrahieren (whitespace-separated)
|
|||
|
|
read -r NODE AUTH BLOCK NETWORK HDFS_NAMENODE AUTH_OBJ NFS OBJ S3 SMB CES <<< "$DATA_LINE"
|
|||
|
|
|
|||
|
|
# Funktion zur Status-Konvertierung in CheckMK-Codes
|
|||
|
|
get_status_code() {
|
|||
|
|
case "$1" in
|
|||
|
|
"HEALTHY") echo "0" ;; # OK
|
|||
|
|
"DISABLED") echo "0" ;; # OK (DISABLED ist normal)
|
|||
|
|
"DEGRADED"|"WARNING") echo "1" ;; # WARNING
|
|||
|
|
"UNHEALTHY"|"FAILED"|"ERROR") echo "2" ;; # CRITICAL
|
|||
|
|
*) echo "3" ;; # UNKNOWN
|
|||
|
|
esac
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Funktion zur Status-Beschreibung
|
|||
|
|
get_status_text() {
|
|||
|
|
case "$1" in
|
|||
|
|
"HEALTHY") echo "OK" ;;
|
|||
|
|
"DISABLED") echo "OK (Disabled)" ;;
|
|||
|
|
"DEGRADED"|"WARNING") echo "WARNING" ;;
|
|||
|
|
"UNHEALTHY"|"FAILED"|"ERROR") echo "CRITICAL" ;;
|
|||
|
|
*) echo "UNKNOWN" ;;
|
|||
|
|
esac
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Funktion zur Performance-Daten (numerisch für Graphing)
|
|||
|
|
get_perf_value() {
|
|||
|
|
case "$1" in
|
|||
|
|
"HEALTHY") echo "1" ;;
|
|||
|
|
"DISABLED") echo "0" ;;
|
|||
|
|
"DEGRADED"|"WARNING") echo "0.5" ;;
|
|||
|
|
"UNHEALTHY"|"FAILED"|"ERROR") echo "-1" ;;
|
|||
|
|
*) echo "-2" ;;
|
|||
|
|
esac
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Separate Services für jeden CES-Dienst erstellen
|
|||
|
|
# Format: STATUS_CODE SERVICENAME - STATUS_TEXT: Details | performance_data
|
|||
|
|
|
|||
|
|
# AUTH Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$AUTH")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$AUTH")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$AUTH")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE AUTH $HOSTNAME\" - $STATUS_TEXT: Authentication service is $AUTH | auth=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# BLOCK Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$BLOCK")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$BLOCK")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$BLOCK")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE BLOCK $HOSTNAME\" - $STATUS_TEXT: Block service is $BLOCK | block=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# NETWORK Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$NETWORK")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$NETWORK")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$NETWORK")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE NETWORK $HOSTNAME\" - $STATUS_TEXT: Network service is $NETWORK | network=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# HDFS_NAMENODE Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$HDFS_NAMENODE")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$HDFS_NAMENODE")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$HDFS_NAMENODE")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE HDFS_NAMENODE $HOSTNAME\" - $STATUS_TEXT: HDFS NameNode service is $HDFS_NAMENODE | hdfs_namenode=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# AUTH_OBJ Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$AUTH_OBJ")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$AUTH_OBJ")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$AUTH_OBJ")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE AUTH_OBJ $HOSTNAME\" - $STATUS_TEXT: Authentication Object service is $AUTH_OBJ | auth_obj=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# NFS Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$NFS")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$NFS")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$NFS")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE NFS $HOSTNAME\" - $STATUS_TEXT: NFS service is $NFS | nfs=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# OBJ Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$OBJ")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$OBJ")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$OBJ")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE OBJ $HOSTNAME\" - $STATUS_TEXT: Object service is $OBJ | obj=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# S3 Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$S3")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$S3")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$S3")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE S3 $HOSTNAME\" - $STATUS_TEXT: S3 service is $S3 | s3=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# SMB Service
|
|||
|
|
STATUS_CODE=$(get_status_code "$SMB")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$SMB")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$SMB")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE SMB $HOSTNAME\" - $STATUS_TEXT: SMB service is $SMB | smb=$PERF_VALUE"
|
|||
|
|
|
|||
|
|
# CES Service (Overall)
|
|||
|
|
STATUS_CODE=$(get_status_code "$CES")
|
|||
|
|
STATUS_TEXT=$(get_status_text "$CES")
|
|||
|
|
PERF_VALUE=$(get_perf_value "$CES")
|
|||
|
|
echo "$STATUS_CODE \"GPFS CES STATE CES $HOSTNAME\" - $STATUS_TEXT: CES overall status is $CES | ces=$PERF_VALUE"
|