Checkmk/local checks/gpfs_ces_state.sh
2026-01-14 07:28:32 +01:00

135 lines
4.8 KiB
Bash
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
#
# CheckMK Local Check für IBM Spectrum Scale CES (Cluster Export Services)
# Datei: /usr/lib/check_mk_agent/local/mmces_state
# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/mmces_state
#
# Erstellt separate Services für jeden CES-Dienst
#
# Pfad zum mmces Kommando
MMCES_CMD="/usr/lpp/mmfs/bin/mmces"
HOSTNAME=$(hostname)
# Prüfen ob mmces verfügbar ist
if [ ! -x "$MMCES_CMD" ]; then
    echo "2 MMCES_Global - CRITICAL: mmces command not found at $MMCES_CMD"
    exit 0
fi
# mmces state show ausführen und Ausgabe parsen
OUTPUT=$($MMCES_CMD state show -N "$HOSTNAME" 2>&1)
EXIT_CODE=$?
if [ $EXIT_CODE -ne 0 ]; then
    echo "2 MMCES_Global - CRITICAL: mmces command failed with exit code $EXIT_CODE"
    exit 0
fi
# Header-Zeile überspringen und Datenzeile extrahieren
DATA_LINE=$(echo "$OUTPUT" | grep -v "^NODE" | grep -v "^-" | head -1)
if [ -z "$DATA_LINE" ]; then
    echo "2 MMCES_Global - CRITICAL: No data found in mmces output"
    exit 0
fi
# Felder extrahieren (whitespace-separated)
read -r NODE AUTH BLOCK NETWORK HDFS_NAMENODE AUTH_OBJ NFS OBJ S3 SMB CES <<< "$DATA_LINE"
# Funktion zur Status-Konvertierung in CheckMK-Codes
get_status_code() {
    case "$1" in
        "HEALTHY") echo "0" ;;           # OK
        "DISABLED") echo "0" ;;          # OK (DISABLED ist normal)
        "DEGRADED"|"WARNING") echo "1" ;; # WARNING
        "UNHEALTHY"|"FAILED"|"ERROR") echo "2" ;; # CRITICAL
        *) echo "3" ;;                   # UNKNOWN
    esac
}
# Funktion zur Status-Beschreibung
get_status_text() {
    case "$1" in
        "HEALTHY") echo "OK" ;;
        "DISABLED") echo "OK (Disabled)" ;;
        "DEGRADED"|"WARNING") echo "WARNING" ;;
        "UNHEALTHY"|"FAILED"|"ERROR") echo "CRITICAL" ;;
        *) echo "UNKNOWN" ;;
    esac
}
# Funktion zur Performance-Daten (numerisch für Graphing)
get_perf_value() {
    case "$1" in
        "HEALTHY") echo "1" ;;
        "DISABLED") echo "0" ;;
        "DEGRADED"|"WARNING") echo "0.5" ;;
        "UNHEALTHY"|"FAILED"|"ERROR") echo "-1" ;;
        *) echo "-2" ;;
    esac
}
# Separate Services für jeden CES-Dienst erstellen
# Format: STATUS_CODE SERVICENAME - STATUS_TEXT: Details | performance_data
# AUTH Service
STATUS_CODE=$(get_status_code "$AUTH")
STATUS_TEXT=$(get_status_text "$AUTH")
PERF_VALUE=$(get_perf_value "$AUTH")
echo "$STATUS_CODE \"GPFS CES STATE AUTH $HOSTNAME\" - $STATUS_TEXT: Authentication service is $AUTH | auth=$PERF_VALUE"
# BLOCK Service
STATUS_CODE=$(get_status_code "$BLOCK")
STATUS_TEXT=$(get_status_text "$BLOCK")
PERF_VALUE=$(get_perf_value "$BLOCK")
echo "$STATUS_CODE \"GPFS CES STATE BLOCK $HOSTNAME\" - $STATUS_TEXT: Block service is $BLOCK | block=$PERF_VALUE"
# NETWORK Service
STATUS_CODE=$(get_status_code "$NETWORK")
STATUS_TEXT=$(get_status_text "$NETWORK")
PERF_VALUE=$(get_perf_value "$NETWORK")
echo "$STATUS_CODE \"GPFS CES STATE NETWORK $HOSTNAME\" - $STATUS_TEXT: Network service is $NETWORK | network=$PERF_VALUE"
# HDFS_NAMENODE Service
STATUS_CODE=$(get_status_code "$HDFS_NAMENODE")
STATUS_TEXT=$(get_status_text "$HDFS_NAMENODE")
PERF_VALUE=$(get_perf_value "$HDFS_NAMENODE")
echo "$STATUS_CODE \"GPFS CES STATE HDFS_NAMENODE $HOSTNAME\" - $STATUS_TEXT: HDFS NameNode service is $HDFS_NAMENODE | hdfs_namenode=$PERF_VALUE"
# AUTH_OBJ Service
STATUS_CODE=$(get_status_code "$AUTH_OBJ")
STATUS_TEXT=$(get_status_text "$AUTH_OBJ")
PERF_VALUE=$(get_perf_value "$AUTH_OBJ")
echo "$STATUS_CODE \"GPFS CES STATE AUTH_OBJ $HOSTNAME\" - $STATUS_TEXT: Authentication Object service is $AUTH_OBJ | auth_obj=$PERF_VALUE"
# NFS Service
STATUS_CODE=$(get_status_code "$NFS")
STATUS_TEXT=$(get_status_text "$NFS")
PERF_VALUE=$(get_perf_value "$NFS")
echo "$STATUS_CODE \"GPFS CES STATE NFS $HOSTNAME\" - $STATUS_TEXT: NFS service is $NFS | nfs=$PERF_VALUE"
# OBJ Service
STATUS_CODE=$(get_status_code "$OBJ")
STATUS_TEXT=$(get_status_text "$OBJ")
PERF_VALUE=$(get_perf_value "$OBJ")
echo "$STATUS_CODE \"GPFS CES STATE OBJ $HOSTNAME\" - $STATUS_TEXT: Object service is $OBJ | obj=$PERF_VALUE"
# S3 Service
STATUS_CODE=$(get_status_code "$S3")
STATUS_TEXT=$(get_status_text "$S3")
PERF_VALUE=$(get_perf_value "$S3")
echo "$STATUS_CODE \"GPFS CES STATE S3 $HOSTNAME\" - $STATUS_TEXT: S3 service is $S3 | s3=$PERF_VALUE"
# SMB Service
STATUS_CODE=$(get_status_code "$SMB")
STATUS_TEXT=$(get_status_text "$SMB")
PERF_VALUE=$(get_perf_value "$SMB")
echo "$STATUS_CODE \"GPFS CES STATE SMB $HOSTNAME\" - $STATUS_TEXT: SMB service is $SMB | smb=$PERF_VALUE"
# CES Service (Overall)
STATUS_CODE=$(get_status_code "$CES")
STATUS_TEXT=$(get_status_text "$CES")
PERF_VALUE=$(get_perf_value "$CES")
echo "$STATUS_CODE \"GPFS CES STATE CES $HOSTNAME\" - $STATUS_TEXT: CES overall status is $CES | ces=$PERF_VALUE"