Checkmk/local checks/gpfs_node_state.sh
2026-01-14 07:28:32 +01:00

112 lines
3.9 KiB
Bash
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
#
# CheckMK Local Check für IBM Spectrum Scale mmhealth node show
# Datei: /usr/lib/check_mk_agent/local/mmhealth_node
# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/mmhealth_node
#
# Erstellt separate Services für jeden GPFS Health Component
#
# Pfad zum mmhealth Kommando
MMHEALTH_CMD="/usr/lpp/mmfs/bin/mmhealth"
HOSTNAME=$(hostname)
# Prüfen ob mmhealth verfügbar ist
if [ ! -x "$MMHEALTH_CMD" ]; then
    echo "2 \"GPFS HEALTH GLOBAL $HOSTNAME\" - CRITICAL: mmhealth command not found at $MMHEALTH_CMD"
    exit 0
fi
# mmhealth node show ausführen und Ausgabe parsen
OUTPUT=$($MMHEALTH_CMD node show 2>&1)
EXIT_CODE=$?
if [ $EXIT_CODE -ne 0 ]; then
    echo "2 \"GPFS HEALTH GLOBAL $HOSTNAME\" - CRITICAL: mmhealth command failed with exit code $EXIT_CODE"
    exit 0
fi
# Node Status extrahieren
NODE_STATUS=$(echo "$OUTPUT" | grep "Node status:" | awk '{print $3}')
STATUS_CHANGE=$(echo "$OUTPUT" | grep "Status Change:" | cut -d':' -f2- | sed 's/^[[:space:]]*//')
if [ -z "$NODE_STATUS" ]; then
    echo "2 \"GPFS HEALTH GLOBAL $HOSTNAME\" - CRITICAL: Could not parse node status from mmhealth output"
    exit 0
fi
# Funktion zur Status-Konvertierung in CheckMK-Codes
get_status_code() {
    case "$1" in
        "HEALTHY") echo "0" ;;           # OK
        "TIPS") echo "1" ;;              # WARNING - TIPS bedeutet Verbesserungsvorschläge
        "DEGRADED"|"WARNING") echo "1" ;; # WARNING
        "UNHEALTHY"|"FAILED"|"ERROR") echo "2" ;; # CRITICAL
        *) echo "3" ;;                   # UNKNOWN
    esac
}
# Funktion zur Status-Beschreibung
get_status_text() {
    case "$1" in
        "HEALTHY") echo "OK" ;;
        "TIPS") echo "WARNING" ;;
        "DEGRADED"|"WARNING") echo "WARNING" ;;
        "UNHEALTHY"|"FAILED"|"ERROR") echo "CRITICAL" ;;
        *) echo "UNKNOWN" ;;
    esac
}
# Funktion zur Performance-Daten (numerisch für Graphing)
get_perf_value() {
    case "$1" in
        "HEALTHY") echo "1" ;;
        "TIPS") echo "0.5" ;;
        "DEGRADED"|"WARNING") echo "0.5" ;;
        "UNHEALTHY"|"FAILED"|"ERROR") echo "-1" ;;
        *) echo "-2" ;;
    esac
}
# Globaler Node Status
STATUS_CODE=$(get_status_code "$NODE_STATUS")
STATUS_TEXT=$(get_status_text "$NODE_STATUS")
PERF_VALUE=$(get_perf_value "$NODE_STATUS")
echo "$STATUS_CODE \"GPFS HEALTH NODE $HOSTNAME\" - $STATUS_TEXT: Node status is $NODE_STATUS ($STATUS_CHANGE) | node_status=$PERF_VALUE"
# Component-Details extrahieren (nach der Trennlinie)
COMPONENT_DATA=$(echo "$OUTPUT" | sed -n '/^Component/,/^$/p' | tail -n +3)
# Jeden Component parsen
while IFS= read -r line; do
    # Leere Zeilen überspringen
    [ -z "$line" ] && continue
    # Zeilen mit nur Trennzeichen überspringen
    echo "$line" | grep -q "^-*$" && continue
    # Component-Daten extrahieren (whitespace-separated, aber mit Spaces in Reasons)
    COMPONENT=$(echo "$line" | awk '{print $1}')
    COMP_STATUS=$(echo "$line" | awk '{print $2}')
    COMP_CHANGE=$(echo "$line" | awk '{print $3" "$4" "$5}' | sed 's/[[:space:]]*$//')
    REASONS=$(echo "$line" | cut -d' ' -f6- | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//')
    # Leere Component-Namen überspringen
    [ -z "$COMPONENT" ] || [ "$COMPONENT" = "-" ] && continue
    # Status-Codes für Component
    STATUS_CODE=$(get_status_code "$COMP_STATUS")
    STATUS_TEXT=$(get_status_text "$COMP_STATUS")
    PERF_VALUE=$(get_perf_value "$COMP_STATUS")
    # Reasons formatieren (falls vorhanden)
    if [ -n "$REASONS" ] && [ "$REASONS" != "-" ]; then
        REASON_TEXT=" - $REASONS"
    else
        REASON_TEXT=""
    fi
    # Service für Component erstellen
    echo "$STATUS_CODE \"GPFS HEALTH $COMPONENT $HOSTNAME\" - $STATUS_TEXT: $COMPONENT is $COMP_STATUS ($COMP_CHANGE)$REASON_TEXT | ${COMPONENT,,}_status=$PERF_VALUE"
done <<< "$COMPONENT_DATA"