Checkmk/local checks/gpfs_filesystem

206 lines
7.3 KiB
Plaintext
Raw Normal View History

2026-01-14 07:28:32 +01:00
#!/bin/bash
#
# CheckMK Local Check für GPFS Filesystem Disk Usage
# Datei: /usr/lib/check_mk_agent/local/gpfs_filesystem
# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/gpfs_filesystem
#
# Überwacht Speicherplatz-Nutzung aller GPFS Filesysteme (cesrz*, cesroot)
#
# Schwellwerte in Prozent
WARN_THRESHOLD=80
CRIT_THRESHOLD=90
# Hostname für Service-Namen
HOSTNAME=$(hostname)
# Funktion zur Größenkonvertierung (von df output zu Bytes)
convert_size_to_bytes() {
    local size=$1
    local unit=${size: -1}
    local number=${size%?}
    # Behandlung von Dezimalzahlen (z.B. "4.5" -> "45", dann /10)
    if [[ "$number" =~ ^[0-9]+\.[0-9]+$ ]]; then
        # Dezimalzahl: z.B. "4.5" -> "45" und Division durch 10 später
        local integer_part=$(echo "$number" | cut -d. -f1)
        local decimal_part=$(echo "$number" | cut -d. -f2)
        # Nur erste Dezimalstelle verwenden für Einfachheit
        local decimal_digit=${decimal_part:0:1}
        local scaled_number=$((integer_part * 10 + decimal_digit))
    elif [[ "$number" =~ ^[0-9]+$ ]]; then
        # Ganze Zahl: z.B. "20" -> "200" (mal 10 für Konsistenz)
        local scaled_number=$((number * 10))
    else
        echo "0"
        return
    fi
    case $unit in
        'K'|'k') echo $((scaled_number * 1024 / 10)) ;;
        'M'|'m') echo $((scaled_number * 1024 * 1024 / 10)) ;;
        'G'|'g') echo $((scaled_number * 1024 * 1024 * 1024 / 10)) ;;
        'T'|'t') echo $((scaled_number * 1024 * 1024 * 1024 * 1024 / 10)) ;;
        'P'|'p') echo $((scaled_number * 1024 * 1024 * 1024 * 1024 * 1024 / 10)) ;;
        *)
            # Wenn keine Einheit, dann ist es bereits in der Basis-Einheit
            if [[ "$size" =~ ^[0-9]+$ ]]; then
                echo "$size"
            else
                echo "0"
            fi
        ;;
    esac
}
# Prozentsatz berechnen
calculate_percentage() {
    local used=$1
    local total=$2
    if [ "$total" -gt 0 ]; then
        # Bash-Integer-Arithmetik: (used * 10000 / total) / 100 für 2 Dezimalstellen
        local result=$((used * 10000 / total))
        local integer_part=$((result / 100))
        local decimal_part=$((result % 100))
        printf "%d.%02d" "$integer_part" "$decimal_part"
    else
        echo "0.00"
    fi
}
# CheckMK Status-Code bestimmen
get_status_code() {
    local percentage_str=$1
    # Prozentsatz in Integer umwandeln für Vergleich (z.B. "78.50" -> 7850)
    local percentage_int=$(echo "$percentage_str" | sed 's/\.//' | sed 's/^0*//')
    [ -z "$percentage_int" ] && percentage_int=0
    local warn_int=$((WARN_THRESHOLD * 100))
    local crit_int=$((CRIT_THRESHOLD * 100))
    if [ "$percentage_int" -ge "$crit_int" ]; then
        echo "2"
    elif [ "$percentage_int" -ge "$warn_int" ]; then
        echo "1"
    else
        echo "0"
    fi
}
# Status-Text bestimmen
get_status_text() {
    local code=$1
    case $code in
        0) echo "OK" ;;
        1) echo "WARNING" ;;
        2) echo "CRITICAL" ;;
        *) echo "UNKNOWN" ;;
    esac
}
# Größe in menschenlesbares Format umwandeln
format_bytes() {
    local bytes=$1
    if [ "$bytes" -ge 1099511627776 ]; then
        echo "$((bytes / 1099511627776))TB"
    elif [ "$bytes" -ge 1073741824 ]; then
        echo "$((bytes / 1073741824))GB"
    elif [ "$bytes" -ge 1048576 ]; then
        echo "$((bytes / 1048576))MB"
    elif [ "$bytes" -ge 1024 ]; then
        echo "$((bytes / 1024))KB"
    else
        echo "${bytes}B"
    fi
}
# Einzelnes Filesystem prüfen
check_filesystem() {
    local filesystem=$1
    local size_str=$2
    local used_str=$3
    local avail_str=$4
    local use_percent_str=$5
    local mountpoint=$6
    # Service-Name erstellen
    local service_name="GPFS FILESYSTEM ${filesystem} ${HOSTNAME}"
    # Prozentsatz aus df-Output extrahieren (z.B. "78%" -> "78.00")
    local use_percent=$(echo "$use_percent_str" | sed 's/%//')
    if ! [[ "$use_percent" =~ ^[0-9]+$ ]]; then
        use_percent=0
    fi
    use_percent="${use_percent}.00"  # Zu Dezimalformat konvertieren
    # Größen in Bytes konvertieren
    local size_bytes=$(convert_size_to_bytes "$size_str")
    local used_bytes=$(convert_size_to_bytes "$used_str")
    local avail_bytes=$(convert_size_to_bytes "$avail_str")
    # Validierung der Werte - sicherstellen dass alle Werte numerisch sind
    if [ "$size_bytes" -eq 0 ] 2>/dev/null || [ -z "$size_bytes" ]; then
        echo "3 \"$service_name\" UNKNOWN: Could not parse filesystem size ($size_str)"
        return
    fi
    if [ -z "$used_bytes" ] || ! [[ "$used_bytes" =~ ^[0-9]+$ ]]; then
        used_bytes=0
    fi
    if [ -z "$avail_bytes" ] || ! [[ "$avail_bytes" =~ ^[0-9]+$ ]]; then
        avail_bytes=0
    fi
    # Status bestimmen
    local status_code=$(get_status_code "$use_percent")
    local status_text=$(get_status_text "$status_code")
    # Schwellwerte für Performance-Daten berechnen (in Bytes)
    local warn_bytes=$((size_bytes * WARN_THRESHOLD / 100))
    local crit_bytes=$((size_bytes * CRIT_THRESHOLD / 100))
    # Performance-Daten erstellen
    local perfdata="used_bytes=${used_bytes};${warn_bytes};${crit_bytes};0;${size_bytes}"
    perfdata="${perfdata}|avail_bytes=${avail_bytes};;;0;${size_bytes}"
    perfdata="${perfdata}|size_bytes=${size_bytes};;;0"
    perfdata="${perfdata}|usage_percent=${use_percent};${WARN_THRESHOLD};${CRIT_THRESHOLD};0;100"
    # Zusätzliche Metriken in verschiedenen Einheiten
    local used_gb=$((used_bytes / 1024 / 1024 / 1024))
    local size_gb=$((size_bytes / 1024 / 1024 / 1024))
    local avail_gb=$((avail_bytes / 1024 / 1024 / 1024))
    perfdata="${perfdata}|used_gb=${used_gb};;;0;${size_gb}"
    perfdata="${perfdata}|avail_gb=${avail_gb};;;0;${size_gb}"
    perfdata="${perfdata}|size_gb=${size_gb};;;0"
    # Details-Text erstellen
    local used_formatted=$(format_bytes "$used_bytes")
    local size_formatted=$(format_bytes "$size_bytes")
    local avail_formatted=$(format_bytes "$avail_bytes")
    local details="Usage: ${used_formatted}/${size_formatted} (${use_percent}%), Available: ${avail_formatted}, Mountpoint: ${mountpoint}"
    # CheckMK Service ausgeben
    echo "$status_code \"$service_name\" $perfdata $status_text: $details"
}
# ============================================================================
# HAUPTTEIL: Alle GPFS Filesysteme finden und prüfen
# ============================================================================
# df -h ausführen und nach GPFS Filesystemen suchen (cesrz*, cesroot)
df -h | grep -E "^ces" | while read filesystem size used avail use_percent mountpoint; do
    # Nur GPFS-relevante Filesysteme (cesrz* oder cesroot)
    if [[ "$filesystem" =~ ^cesrz[0-9]+$ ]] || [[ "$filesystem" == "cesroot" ]]; then
        check_filesystem "$filesystem" "$size" "$used" "$avail" "$use_percent" "$mountpoint"
    fi
done
# Falls keine GPFS Filesysteme gefunden wurden, Info-Service erstellen
if ! df -h | grep -E "^ces" >/dev/null 2>&1; then
    echo "1 \"GPFS FILESYSTEM GLOBAL $HOSTNAME\" WARNING: No GPFS filesystems (cesrz*, cesroot) found"
fi