From 44298c47b0d7e4e170b2168ddca592d50800c8d2 Mon Sep 17 00:00:00 2001 From: magadimn Date: Wed, 14 Jan 2026 07:28:32 +0100 Subject: [PATCH] add files --- local checks/gpfs_ces_state.sh | 134 ++++++++++++ local checks/gpfs_fileset_inodes | 289 +++++++++++++++++++++++++ local checks/gpfs_filesystem | 206 ++++++++++++++++++ local checks/gpfs_node_state.sh | 112 ++++++++++ local checks/ms_storagereplication.ps1 | 14 ++ local checks/sssl check | 149 +++++++++++++ 6 files changed, 904 insertions(+) create mode 100644 local checks/gpfs_ces_state.sh create mode 100644 local checks/gpfs_fileset_inodes create mode 100644 local checks/gpfs_filesystem create mode 100644 local checks/gpfs_node_state.sh create mode 100644 local checks/ms_storagereplication.ps1 create mode 100644 local checks/sssl check diff --git a/local checks/gpfs_ces_state.sh b/local checks/gpfs_ces_state.sh new file mode 100644 index 0000000..4492e3b --- /dev/null +++ b/local checks/gpfs_ces_state.sh @@ -0,0 +1,134 @@ +#!/bin/bash +# +# CheckMK Local Check für IBM Spectrum Scale CES (Cluster Export Services) +# Datei: /usr/lib/check_mk_agent/local/mmces_state +# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/mmces_state +# +# Erstellt separate Services für jeden CES-Dienst +# + +# Pfad zum mmces Kommando +MMCES_CMD="/usr/lpp/mmfs/bin/mmces" +HOSTNAME=$(hostname) + +# Prüfen ob mmces verfügbar ist +if [ ! -x "$MMCES_CMD" ]; then +    echo "2 MMCES_Global - CRITICAL: mmces command not found at $MMCES_CMD" +    exit 0 +fi + +# mmces state show ausführen und Ausgabe parsen +OUTPUT=$($MMCES_CMD state show -N "$HOSTNAME" 2>&1) +EXIT_CODE=$? + +if [ $EXIT_CODE -ne 0 ]; then +    echo "2 MMCES_Global - CRITICAL: mmces command failed with exit code $EXIT_CODE" +    exit 0 +fi + +# Header-Zeile überspringen und Datenzeile extrahieren +DATA_LINE=$(echo "$OUTPUT" | grep -v "^NODE" | grep -v "^-" | head -1) + +if [ -z "$DATA_LINE" ]; then +    echo "2 MMCES_Global - CRITICAL: No data found in mmces output" +    exit 0 +fi + +# Felder extrahieren (whitespace-separated) +read -r NODE AUTH BLOCK NETWORK HDFS_NAMENODE AUTH_OBJ NFS OBJ S3 SMB CES <<< "$DATA_LINE" + +# Funktion zur Status-Konvertierung in CheckMK-Codes +get_status_code() { +    case "$1" in +        "HEALTHY") echo "0" ;;           # OK +        "DISABLED") echo "0" ;;          # OK (DISABLED ist normal) +        "DEGRADED"|"WARNING") echo "1" ;; # WARNING +        "UNHEALTHY"|"FAILED"|"ERROR") echo "2" ;; # CRITICAL +        *) echo "3" ;;                   # UNKNOWN +    esac +} + +# Funktion zur Status-Beschreibung +get_status_text() { +    case "$1" in +        "HEALTHY") echo "OK" ;; +        "DISABLED") echo "OK (Disabled)" ;; +        "DEGRADED"|"WARNING") echo "WARNING" ;; +        "UNHEALTHY"|"FAILED"|"ERROR") echo "CRITICAL" ;; +        *) echo "UNKNOWN" ;; +    esac +} + +# Funktion zur Performance-Daten (numerisch für Graphing) +get_perf_value() { +    case "$1" in +        "HEALTHY") echo "1" ;; +        "DISABLED") echo "0" ;; +        "DEGRADED"|"WARNING") echo "0.5" ;; +        "UNHEALTHY"|"FAILED"|"ERROR") echo "-1" ;; +        *) echo "-2" ;; +    esac +} + +# Separate Services für jeden CES-Dienst erstellen +# Format: STATUS_CODE SERVICENAME - STATUS_TEXT: Details | performance_data + +# AUTH Service +STATUS_CODE=$(get_status_code "$AUTH") +STATUS_TEXT=$(get_status_text "$AUTH") +PERF_VALUE=$(get_perf_value "$AUTH") +echo "$STATUS_CODE \"GPFS CES STATE AUTH $HOSTNAME\" - $STATUS_TEXT: Authentication service is $AUTH | auth=$PERF_VALUE" + +# BLOCK Service +STATUS_CODE=$(get_status_code "$BLOCK") +STATUS_TEXT=$(get_status_text "$BLOCK") +PERF_VALUE=$(get_perf_value "$BLOCK") +echo "$STATUS_CODE \"GPFS CES STATE BLOCK $HOSTNAME\" - $STATUS_TEXT: Block service is $BLOCK | block=$PERF_VALUE" + +# NETWORK Service +STATUS_CODE=$(get_status_code "$NETWORK") +STATUS_TEXT=$(get_status_text "$NETWORK") +PERF_VALUE=$(get_perf_value "$NETWORK") +echo "$STATUS_CODE \"GPFS CES STATE NETWORK $HOSTNAME\" - $STATUS_TEXT: Network service is $NETWORK | network=$PERF_VALUE" + +# HDFS_NAMENODE Service +STATUS_CODE=$(get_status_code "$HDFS_NAMENODE") +STATUS_TEXT=$(get_status_text "$HDFS_NAMENODE") +PERF_VALUE=$(get_perf_value "$HDFS_NAMENODE") +echo "$STATUS_CODE \"GPFS CES STATE HDFS_NAMENODE $HOSTNAME\" - $STATUS_TEXT: HDFS NameNode service is $HDFS_NAMENODE | hdfs_namenode=$PERF_VALUE" + +# AUTH_OBJ Service +STATUS_CODE=$(get_status_code "$AUTH_OBJ") +STATUS_TEXT=$(get_status_text "$AUTH_OBJ") +PERF_VALUE=$(get_perf_value "$AUTH_OBJ") +echo "$STATUS_CODE \"GPFS CES STATE AUTH_OBJ $HOSTNAME\" - $STATUS_TEXT: Authentication Object service is $AUTH_OBJ | auth_obj=$PERF_VALUE" + +# NFS Service +STATUS_CODE=$(get_status_code "$NFS") +STATUS_TEXT=$(get_status_text "$NFS") +PERF_VALUE=$(get_perf_value "$NFS") +echo "$STATUS_CODE \"GPFS CES STATE NFS $HOSTNAME\" - $STATUS_TEXT: NFS service is $NFS | nfs=$PERF_VALUE" + +# OBJ Service +STATUS_CODE=$(get_status_code "$OBJ") +STATUS_TEXT=$(get_status_text "$OBJ") +PERF_VALUE=$(get_perf_value "$OBJ") +echo "$STATUS_CODE \"GPFS CES STATE OBJ $HOSTNAME\" - $STATUS_TEXT: Object service is $OBJ | obj=$PERF_VALUE" + +# S3 Service +STATUS_CODE=$(get_status_code "$S3") +STATUS_TEXT=$(get_status_text "$S3") +PERF_VALUE=$(get_perf_value "$S3") +echo "$STATUS_CODE \"GPFS CES STATE S3 $HOSTNAME\" - $STATUS_TEXT: S3 service is $S3 | s3=$PERF_VALUE" + +# SMB Service +STATUS_CODE=$(get_status_code "$SMB") +STATUS_TEXT=$(get_status_text "$SMB") +PERF_VALUE=$(get_perf_value "$SMB") +echo "$STATUS_CODE \"GPFS CES STATE SMB $HOSTNAME\" - $STATUS_TEXT: SMB service is $SMB | smb=$PERF_VALUE" + +# CES Service (Overall) +STATUS_CODE=$(get_status_code "$CES") +STATUS_TEXT=$(get_status_text "$CES") +PERF_VALUE=$(get_perf_value "$CES") +echo "$STATUS_CODE \"GPFS CES STATE CES $HOSTNAME\" - $STATUS_TEXT: CES overall status is $CES | ces=$PERF_VALUE" diff --git a/local checks/gpfs_fileset_inodes b/local checks/gpfs_fileset_inodes new file mode 100644 index 0000000..29042d6 --- /dev/null +++ b/local checks/gpfs_fileset_inodes @@ -0,0 +1,289 @@ +#!/bin/bash +# +# CheckMK Local Check für GPFS Fileset Inodes +# Datei: /usr/lib/check_mk_agent/local/gpfs_fileset_inodes +# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/gpfs_fileset_inodes +# +# Überwacht Inode-Verbrauch aller Filesets in allen GPFS Filesystemen +# + +# GPFS Management API Konfiguration +USER="checkmk" +PASSWORD="RUIMv7A9CnUaWtnxVMzn" +HOST="spsctstrz.ad.ghnit.net" +PORT="443" +BASE_URL="https://${HOST}:${PORT}/scalemgmt/v2/filesystems" + +# Schwellwerte in Prozent +WARN_THRESHOLD=80 +CRIT_THRESHOLD=90 + +# Hostname für Service-Namen +HOSTNAME=$(hostname) + +# Funktion für API-Aufrufe +gpfs_api_call() { +    local url="$1" +    curl -s -u "${USER}:${PASSWORD}" \ +         -H "Accept: application/json" \ +         -H "Content-Type: application/json" \ +         -X GET "${url}" 2>/dev/null +} + +# Prozentsatz berechnen - ohne bc für bessere Kontrolle +calculate_percentage() { +    local used=$1 +    local max=$2 +    if [ "$max" -gt 0 ]; then +        # Bash-Integer-Arithmetik: (used * 10000 / max) / 100 für 2 Dezimalstellen +        local result=$((used * 10000 / max)) +        local integer_part=$((result / 100)) +        local decimal_part=$((result % 100)) +        printf "%d.%02d" "$integer_part" "$decimal_part" +    else +        echo "0.00" +    fi +} + +# CheckMK Status-Code bestimmen - ohne bc +get_status_code() { +    local percentage_str=$1 +    # Prozentsatz in Integer umwandeln für Vergleich (z.B. "1.52" -> 152) +    local percentage_int=$(echo "$percentage_str" | sed 's/\.//' | sed 's/^0*//') +    [ -z "$percentage_int" ] && percentage_int=0 + +    local warn_int=$((WARN_THRESHOLD * 100)) +    local crit_int=$((CRIT_THRESHOLD * 100)) + +    if [ "$percentage_int" -ge "$crit_int" ]; then +        echo "2" +    elif [ "$percentage_int" -ge "$warn_int" ]; then +        echo "1" +    else +        echo "0" +    fi +} + +# Status-Text bestimmen +get_status_text() { +    local code=$1 +    case $code in +        0) echo "OK" ;; +        1) echo "WARNING" ;; +        2) echo "CRITICAL" ;; +        *) echo "UNKNOWN" ;; +    esac +} + +# Einzelnes Fileset prüfen +check_fileset() { +    local filesystem=$1 +    local fileset=$2 + +    # Service-Name erstellen +    local service_name="GPFS FILESET INODES ${filesystem}_${fileset} ${HOSTNAME}" + +    # Fileset-Details von API abrufen +    local detail_response=$(gpfs_api_call "${BASE_URL}/${filesystem}/filesets/${fileset}") + +    if [ $? -ne 0 ] || [ -z "$detail_response" ]; then +        echo "2 \"$service_name\" - CRITICAL: API call failed for fileset $fileset in $filesystem" +        return +    fi + +    # JSON parsen - verschiedene mögliche Pfade versuchen +    local used_inodes=$(echo "$detail_response" | jq -r ' +        .filesets[0]?.usage?.usedInodes // +        .usage?.usedInodes // +        .filesets[0]?.usage?.inodeSpaceUsedInodes // +        .usage?.inodeSpaceUsedInodes // +        0' 2>/dev/null) + +    local allocated_inodes=$(echo "$detail_response" | jq -r ' +        .filesets[0]?.usage?.allocatedInodes // +        .usage?.allocatedInodes // +        0' 2>/dev/null) + +    local max_inodes=$(echo "$detail_response" | jq -r ' +        .filesets[0]?.config?.maxNumInodes // +        .config?.maxNumInodes // +        0' 2>/dev/null) + +    local free_inodes=$(echo "$detail_response" | jq -r ' +        .filesets[0]?.usage?.inodeSpaceFreeInodes // +        .usage?.inodeSpaceFreeInodes // +        0' 2>/dev/null) + +    # Fallback: wenn max_inodes 0 oder null ist, allocated_inodes verwenden +    if [ "$max_inodes" = "0" ] || [ "$max_inodes" = "null" ]; then +        max_inodes="$allocated_inodes" +    fi + +    # Validierung der Werte +    if ! [[ "$used_inodes" =~ ^[0-9]+$ ]] || ! [[ "$max_inodes" =~ ^[0-9]+$ ]]; then +        echo "3 \"$service_name\" - UNKNOWN: Invalid inode data (used: $used_inodes, max: $max_inodes)" +        return +    fi + +    if [ "$max_inodes" -eq 0 ]; then +        echo "0 \"$service_name\" used_inodes=$used_inodes;;;0 OK: No inode limit set for fileset" +        return +    fi + +    # Prozentsatz und Status berechnen +    local percentage=$(calculate_percentage "$used_inodes" "$max_inodes") +    local status_code=$(get_status_code "$percentage") +    local status_text=$(get_status_text "$status_code") + +    # Freie Inodes berechnen +    local calculated_free=$((max_inodes - used_inodes)) +    if [ "$free_inodes" = "0" ] || [ "$free_inodes" = "null" ]; then +        free_inodes=$calculated_free +    fi + +    # Schwellwerte für Performance-Daten berechnen +    local warn_absolute=$(echo "scale=0; $max_inodes * $WARN_THRESHOLD / 100" | bc -l) +    local crit_absolute=$(echo "scale=0; $max_inodes * $CRIT_THRESHOLD / 100" | bc -l) + +    # Zusätzliche Metriken aus JSON extrahieren +    local used_bytes=$(echo "$detail_response" | jq -r ' +        .filesets[0]?.usage?.usedBytes // +        .usage?.usedBytes // +        0' 2>/dev/null) + +    local allocated_bytes=$(echo "$detail_response" | jq -r ' +        .filesets[0]?.usage?.allocatedBytes // +        .usage?.allocatedBytes // +        0' 2>/dev/null) + +    local max_bytes=$(echo "$detail_response" | jq -r ' +        .filesets[0]?.config?.maxBytes // +        .config?.maxBytes // +        0' 2>/dev/null) + +    local free_bytes=0 +    if [ "$allocated_bytes" -gt 0 ] && [ "$used_bytes" -gt 0 ]; then +        free_bytes=$((allocated_bytes - used_bytes)) +    fi + +    # Performance-Daten erstellen - CheckMK-konformes Format +    # Format: metric=value;warn;crit;min;max (getrennt durch Leerzeichen oder |) +    local perfdata="used_inodes=${used_inodes};${warn_absolute};${crit_absolute};0;${max_inodes}" +    perfdata="${perfdata}|free_inodes=${free_inodes};;;0;${max_inodes}" +    perfdata="${perfdata}|allocated_inodes=${allocated_inodes};;;0" +    perfdata="${perfdata}|max_inodes=${max_inodes};;;0" +    perfdata="${perfdata}|usage_percent=${percentage};${WARN_THRESHOLD};${CRIT_THRESHOLD};0;100" + +    # Bytes-Metriken hinzufügen (falls verfügbar) +    if [ "$used_bytes" != "0" ] && [ "$used_bytes" != "null" ]; then +        perfdata="${perfdata}|used_bytes=${used_bytes};;;0" +    fi +    if [ "$allocated_bytes" != "0" ] && [ "$allocated_bytes" != "null" ]; then +        perfdata="${perfdata}|allocated_bytes=${allocated_bytes};;;0" +        if [ "$free_bytes" -gt 0 ]; then +            perfdata="${perfdata}|free_bytes=${free_bytes};;;0" +        fi +    fi +    if [ "$max_bytes" != "0" ] && [ "$max_bytes" != "null" ]; then +        perfdata="${perfdata}|max_bytes=${max_bytes};;;0" +        # Bytes-Prozentsatz berechnen +        if [ "$max_bytes" -gt 0 ] && [ "$used_bytes" -gt 0 ]; then +            local bytes_percentage=$(calculate_percentage "$used_bytes" "$max_bytes") +            perfdata="${perfdata}|bytes_usage_percent=${bytes_percentage};;;0;100" +        fi +    fi + +    # Details-Text erstellen - mehr Informationen +    local details="Inodes: ${used_inodes}/${max_inodes} (${percentage}%)" + +    if [ "$allocated_inodes" != "$max_inodes" ] && [ "$allocated_inodes" -gt 0 ]; then +        details="${details}, allocated: ${allocated_inodes}" +    fi + +    # Bytes-Information hinzufügen falls verfügbar +    if [ "$used_bytes" != "0" ] && [ "$used_bytes" != "null" ]; then +        local used_mb=$((used_bytes / 1024 / 1024)) +        details="${details}, Size: ${used_mb}MB" + +        if [ "$allocated_bytes" != "0" ] && [ "$allocated_bytes" != "null" ]; then +            local allocated_mb=$((allocated_bytes / 1024 / 1024)) +            details="${details}/${allocated_mb}MB" +        fi + +        if [ "$max_bytes" != "0" ] && [ "$max_bytes" != "null" ] && [ "$max_bytes" -gt 0 ]; then +            local max_mb=$((max_bytes / 1024 / 1024)) +            local bytes_percentage=$(calculate_percentage "$used_bytes" "$max_bytes") +            details="${details} (limit: ${max_mb}MB, ${bytes_percentage}%)" +        fi +    fi + +    # CheckMK Service ausgeben +    # Format: STATUS_CODE "SERVICENAME" PERFDATA MESSAGE +    echo "$status_code \"$service_name\" $perfdata $status_text: $details" +} + +# Alle Filesysteme abrufen +get_filesystems() { +    local response=$(gpfs_api_call "$BASE_URL") +    if [ $? -ne 0 ] || [ -z "$response" ]; then +        echo "2 \"GPFS FILESET INODES GLOBAL $HOSTNAME\" - CRITICAL: Cannot connect to GPFS Management API" +        return 1 +    fi + +    echo "$response" | jq -r '.filesystems[]?.name // empty' 2>/dev/null +} + +# Filesets eines Filesystems abrufen +get_filesets() { +    local filesystem=$1 +    local response=$(gpfs_api_call "${BASE_URL}/${filesystem}/filesets") + +    if [ $? -ne 0 ] || [ -z "$response" ]; then +        echo "2 \"GPFS FILESET INODES ${filesystem}_ERROR $HOSTNAME\" - CRITICAL: Cannot get filesets for filesystem $filesystem" +        return 1 +    fi + +    echo "$response" | jq -r '.filesets[]?.filesetName // empty' 2>/dev/null +} + +# ============================================================================ +# HAUPTTEIL: Alle Filesysteme und Filesets durchgehen +# ============================================================================ + +# Prüfen ob jq verfügbar ist +if ! command -v jq >/dev/null 2>&1; then +    echo "2 \"GPFS FILESET INODES GLOBAL $HOSTNAME\" - CRITICAL: jq command not found - required for JSON parsing" +    exit 0 +fi + +# Prüfen ob bc verfügbar ist +if ! command -v bc >/dev/null 2>&1; then +    echo "2 \"GPFS FILESET INODES GLOBAL $HOSTNAME\" - CRITICAL: bc command not found - required for calculations" +    exit 0 +fi + +# Alle Filesysteme abrufen +filesystems=$(get_filesystems) +if [ $? -ne 0 ] || [ -z "$filesystems" ]; then +    exit 0 +fi + +# Für jedes Filesystem alle Filesets prüfen +for filesystem in $filesystems; do +    filesets=$(get_filesets "$filesystem") +    if [ $? -ne 0 ]; then +        continue +    fi + +    if [ -z "$filesets" ]; then +        echo "1 \"GPFS FILESET INODES ${filesystem}_EMPTY $HOSTNAME\" - WARNING: No filesets found in filesystem $filesystem" +        continue +    fi + +    # Für jedes Fileset einen separaten Service erstellen +    for fileset in $filesets; do +        if [ -n "$fileset" ]; then +            check_fileset "$filesystem" "$fileset" +        fi +    done +done diff --git a/local checks/gpfs_filesystem b/local checks/gpfs_filesystem new file mode 100644 index 0000000..1eda8a8 --- /dev/null +++ b/local checks/gpfs_filesystem @@ -0,0 +1,206 @@ + +#!/bin/bash +# +# CheckMK Local Check für GPFS Filesystem Disk Usage +# Datei: /usr/lib/check_mk_agent/local/gpfs_filesystem +# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/gpfs_filesystem +# +# Überwacht Speicherplatz-Nutzung aller GPFS Filesysteme (cesrz*, cesroot) +# + +# Schwellwerte in Prozent +WARN_THRESHOLD=80 +CRIT_THRESHOLD=90 + +# Hostname für Service-Namen +HOSTNAME=$(hostname) + +# Funktion zur Größenkonvertierung (von df output zu Bytes) +convert_size_to_bytes() { +    local size=$1 +    local unit=${size: -1} +    local number=${size%?} + +    # Behandlung von Dezimalzahlen (z.B. "4.5" -> "45", dann /10) +    if [[ "$number" =~ ^[0-9]+\.[0-9]+$ ]]; then +        # Dezimalzahl: z.B. "4.5" -> "45" und Division durch 10 später +        local integer_part=$(echo "$number" | cut -d. -f1) +        local decimal_part=$(echo "$number" | cut -d. -f2) +        # Nur erste Dezimalstelle verwenden für Einfachheit +        local decimal_digit=${decimal_part:0:1} +        local scaled_number=$((integer_part * 10 + decimal_digit)) +    elif [[ "$number" =~ ^[0-9]+$ ]]; then +        # Ganze Zahl: z.B. "20" -> "200" (mal 10 für Konsistenz) +        local scaled_number=$((number * 10)) +    else +        echo "0" +        return +    fi + +    case $unit in +        'K'|'k') echo $((scaled_number * 1024 / 10)) ;; +        'M'|'m') echo $((scaled_number * 1024 * 1024 / 10)) ;; +        'G'|'g') echo $((scaled_number * 1024 * 1024 * 1024 / 10)) ;; +        'T'|'t') echo $((scaled_number * 1024 * 1024 * 1024 * 1024 / 10)) ;; +        'P'|'p') echo $((scaled_number * 1024 * 1024 * 1024 * 1024 * 1024 / 10)) ;; +        *) +            # Wenn keine Einheit, dann ist es bereits in der Basis-Einheit +            if [[ "$size" =~ ^[0-9]+$ ]]; then +                echo "$size" +            else +                echo "0" +            fi +        ;; +    esac +} + +# Prozentsatz berechnen +calculate_percentage() { +    local used=$1 +    local total=$2 +    if [ "$total" -gt 0 ]; then +        # Bash-Integer-Arithmetik: (used * 10000 / total) / 100 für 2 Dezimalstellen +        local result=$((used * 10000 / total)) +        local integer_part=$((result / 100)) +        local decimal_part=$((result % 100)) +        printf "%d.%02d" "$integer_part" "$decimal_part" +    else +        echo "0.00" +    fi +} + +# CheckMK Status-Code bestimmen +get_status_code() { +    local percentage_str=$1 +    # Prozentsatz in Integer umwandeln für Vergleich (z.B. "78.50" -> 7850) +    local percentage_int=$(echo "$percentage_str" | sed 's/\.//' | sed 's/^0*//') +    [ -z "$percentage_int" ] && percentage_int=0 + +    local warn_int=$((WARN_THRESHOLD * 100)) +    local crit_int=$((CRIT_THRESHOLD * 100)) + +    if [ "$percentage_int" -ge "$crit_int" ]; then +        echo "2" +    elif [ "$percentage_int" -ge "$warn_int" ]; then +        echo "1" +    else +        echo "0" +    fi +} + +# Status-Text bestimmen +get_status_text() { +    local code=$1 +    case $code in +        0) echo "OK" ;; +        1) echo "WARNING" ;; +        2) echo "CRITICAL" ;; +        *) echo "UNKNOWN" ;; +    esac +} + +# Größe in menschenlesbares Format umwandeln +format_bytes() { +    local bytes=$1 + +    if [ "$bytes" -ge 1099511627776 ]; then +        echo "$((bytes / 1099511627776))TB" +    elif [ "$bytes" -ge 1073741824 ]; then +        echo "$((bytes / 1073741824))GB" +    elif [ "$bytes" -ge 1048576 ]; then +        echo "$((bytes / 1048576))MB" +    elif [ "$bytes" -ge 1024 ]; then +        echo "$((bytes / 1024))KB" +    else +        echo "${bytes}B" +    fi +} + +# Einzelnes Filesystem prüfen +check_filesystem() { +    local filesystem=$1 +    local size_str=$2 +    local used_str=$3 +    local avail_str=$4 +    local use_percent_str=$5 +    local mountpoint=$6 + +    # Service-Name erstellen +    local service_name="GPFS FILESYSTEM ${filesystem} ${HOSTNAME}" + +    # Prozentsatz aus df-Output extrahieren (z.B. "78%" -> "78.00") +    local use_percent=$(echo "$use_percent_str" | sed 's/%//') +    if ! [[ "$use_percent" =~ ^[0-9]+$ ]]; then +        use_percent=0 +    fi +    use_percent="${use_percent}.00"  # Zu Dezimalformat konvertieren + +    # Größen in Bytes konvertieren +    local size_bytes=$(convert_size_to_bytes "$size_str") +    local used_bytes=$(convert_size_to_bytes "$used_str") +    local avail_bytes=$(convert_size_to_bytes "$avail_str") + +    # Validierung der Werte - sicherstellen dass alle Werte numerisch sind +    if [ "$size_bytes" -eq 0 ] 2>/dev/null || [ -z "$size_bytes" ]; then +        echo "3 \"$service_name\" UNKNOWN: Could not parse filesystem size ($size_str)" +        return +    fi + +    if [ -z "$used_bytes" ] || ! [[ "$used_bytes" =~ ^[0-9]+$ ]]; then +        used_bytes=0 +    fi + +    if [ -z "$avail_bytes" ] || ! [[ "$avail_bytes" =~ ^[0-9]+$ ]]; then +        avail_bytes=0 +    fi + +    # Status bestimmen +    local status_code=$(get_status_code "$use_percent") +    local status_text=$(get_status_text "$status_code") + +    # Schwellwerte für Performance-Daten berechnen (in Bytes) +    local warn_bytes=$((size_bytes * WARN_THRESHOLD / 100)) +    local crit_bytes=$((size_bytes * CRIT_THRESHOLD / 100)) + +    # Performance-Daten erstellen +    local perfdata="used_bytes=${used_bytes};${warn_bytes};${crit_bytes};0;${size_bytes}" +    perfdata="${perfdata}|avail_bytes=${avail_bytes};;;0;${size_bytes}" +    perfdata="${perfdata}|size_bytes=${size_bytes};;;0" +    perfdata="${perfdata}|usage_percent=${use_percent};${WARN_THRESHOLD};${CRIT_THRESHOLD};0;100" + +    # Zusätzliche Metriken in verschiedenen Einheiten +    local used_gb=$((used_bytes / 1024 / 1024 / 1024)) +    local size_gb=$((size_bytes / 1024 / 1024 / 1024)) +    local avail_gb=$((avail_bytes / 1024 / 1024 / 1024)) + +    perfdata="${perfdata}|used_gb=${used_gb};;;0;${size_gb}" +    perfdata="${perfdata}|avail_gb=${avail_gb};;;0;${size_gb}" +    perfdata="${perfdata}|size_gb=${size_gb};;;0" + +    # Details-Text erstellen +    local used_formatted=$(format_bytes "$used_bytes") +    local size_formatted=$(format_bytes "$size_bytes") +    local avail_formatted=$(format_bytes "$avail_bytes") + +    local details="Usage: ${used_formatted}/${size_formatted} (${use_percent}%), Available: ${avail_formatted}, Mountpoint: ${mountpoint}" + +    # CheckMK Service ausgeben +    echo "$status_code \"$service_name\" $perfdata $status_text: $details" +} + +# ============================================================================ +# HAUPTTEIL: Alle GPFS Filesysteme finden und prüfen +# ============================================================================ + +# df -h ausführen und nach GPFS Filesystemen suchen (cesrz*, cesroot) +df -h | grep -E "^ces" | while read filesystem size used avail use_percent mountpoint; do +    # Nur GPFS-relevante Filesysteme (cesrz* oder cesroot) +    if [[ "$filesystem" =~ ^cesrz[0-9]+$ ]] || [[ "$filesystem" == "cesroot" ]]; then +        check_filesystem "$filesystem" "$size" "$used" "$avail" "$use_percent" "$mountpoint" +    fi +done + +# Falls keine GPFS Filesysteme gefunden wurden, Info-Service erstellen +if ! df -h | grep -E "^ces" >/dev/null 2>&1; then +    echo "1 \"GPFS FILESYSTEM GLOBAL $HOSTNAME\" WARNING: No GPFS filesystems (cesrz*, cesroot) found" +fi \ No newline at end of file diff --git a/local checks/gpfs_node_state.sh b/local checks/gpfs_node_state.sh new file mode 100644 index 0000000..ae0fb4a --- /dev/null +++ b/local checks/gpfs_node_state.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# +# CheckMK Local Check für IBM Spectrum Scale mmhealth node show +# Datei: /usr/lib/check_mk_agent/local/mmhealth_node +# Ausführbar machen: chmod +x /usr/lib/check_mk_agent/local/mmhealth_node +# +# Erstellt separate Services für jeden GPFS Health Component +# + +# Pfad zum mmhealth Kommando +MMHEALTH_CMD="/usr/lpp/mmfs/bin/mmhealth" +HOSTNAME=$(hostname) + +# Prüfen ob mmhealth verfügbar ist +if [ ! -x "$MMHEALTH_CMD" ]; then +    echo "2 \"GPFS HEALTH GLOBAL $HOSTNAME\" - CRITICAL: mmhealth command not found at $MMHEALTH_CMD" +    exit 0 +fi + +# mmhealth node show ausführen und Ausgabe parsen +OUTPUT=$($MMHEALTH_CMD node show 2>&1) +EXIT_CODE=$? + +if [ $EXIT_CODE -ne 0 ]; then +    echo "2 \"GPFS HEALTH GLOBAL $HOSTNAME\" - CRITICAL: mmhealth command failed with exit code $EXIT_CODE" +    exit 0 +fi + +# Node Status extrahieren +NODE_STATUS=$(echo "$OUTPUT" | grep "Node status:" | awk '{print $3}') +STATUS_CHANGE=$(echo "$OUTPUT" | grep "Status Change:" | cut -d':' -f2- | sed 's/^[[:space:]]*//') + +if [ -z "$NODE_STATUS" ]; then +    echo "2 \"GPFS HEALTH GLOBAL $HOSTNAME\" - CRITICAL: Could not parse node status from mmhealth output" +    exit 0 +fi + +# Funktion zur Status-Konvertierung in CheckMK-Codes +get_status_code() { +    case "$1" in +        "HEALTHY") echo "0" ;;           # OK +        "TIPS") echo "1" ;;              # WARNING - TIPS bedeutet Verbesserungsvorschläge +        "DEGRADED"|"WARNING") echo "1" ;; # WARNING +        "UNHEALTHY"|"FAILED"|"ERROR") echo "2" ;; # CRITICAL +        *) echo "3" ;;                   # UNKNOWN +    esac +} + +# Funktion zur Status-Beschreibung +get_status_text() { +    case "$1" in +        "HEALTHY") echo "OK" ;; +        "TIPS") echo "WARNING" ;; +        "DEGRADED"|"WARNING") echo "WARNING" ;; +        "UNHEALTHY"|"FAILED"|"ERROR") echo "CRITICAL" ;; +        *) echo "UNKNOWN" ;; +    esac +} + +# Funktion zur Performance-Daten (numerisch für Graphing) +get_perf_value() { +    case "$1" in +        "HEALTHY") echo "1" ;; +        "TIPS") echo "0.5" ;; +        "DEGRADED"|"WARNING") echo "0.5" ;; +        "UNHEALTHY"|"FAILED"|"ERROR") echo "-1" ;; +        *) echo "-2" ;; +    esac +} + +# Globaler Node Status +STATUS_CODE=$(get_status_code "$NODE_STATUS") +STATUS_TEXT=$(get_status_text "$NODE_STATUS") +PERF_VALUE=$(get_perf_value "$NODE_STATUS") +echo "$STATUS_CODE \"GPFS HEALTH NODE $HOSTNAME\" - $STATUS_TEXT: Node status is $NODE_STATUS ($STATUS_CHANGE) | node_status=$PERF_VALUE" + +# Component-Details extrahieren (nach der Trennlinie) +COMPONENT_DATA=$(echo "$OUTPUT" | sed -n '/^Component/,/^$/p' | tail -n +3) + +# Jeden Component parsen +while IFS= read -r line; do +    # Leere Zeilen überspringen +    [ -z "$line" ] && continue + +    # Zeilen mit nur Trennzeichen überspringen +    echo "$line" | grep -q "^-*$" && continue + +    # Component-Daten extrahieren (whitespace-separated, aber mit Spaces in Reasons) +    COMPONENT=$(echo "$line" | awk '{print $1}') +    COMP_STATUS=$(echo "$line" | awk '{print $2}') +    COMP_CHANGE=$(echo "$line" | awk '{print $3" "$4" "$5}' | sed 's/[[:space:]]*$//') +    REASONS=$(echo "$line" | cut -d' ' -f6- | sed 's/^[[:space:]]*//' | sed 's/[[:space:]]*$//') + +    # Leere Component-Namen überspringen +    [ -z "$COMPONENT" ] || [ "$COMPONENT" = "-" ] && continue + +    # Status-Codes für Component +    STATUS_CODE=$(get_status_code "$COMP_STATUS") +    STATUS_TEXT=$(get_status_text "$COMP_STATUS") +    PERF_VALUE=$(get_perf_value "$COMP_STATUS") + +    # Reasons formatieren (falls vorhanden) +    if [ -n "$REASONS" ] && [ "$REASONS" != "-" ]; then +        REASON_TEXT=" - $REASONS" +    else +        REASON_TEXT="" +    fi + +    # Service für Component erstellen +    echo "$STATUS_CODE \"GPFS HEALTH $COMPONENT $HOSTNAME\" - $STATUS_TEXT: $COMPONENT is $COMP_STATUS ($COMP_CHANGE)$REASON_TEXT | ${COMPONENT,,}_status=$PERF_VALUE" + +done <<< "$COMPONENT_DATA" \ No newline at end of file diff --git a/local checks/ms_storagereplication.ps1 b/local checks/ms_storagereplication.ps1 new file mode 100644 index 0000000..f7a70e9 --- /dev/null +++ b/local checks/ms_storagereplication.ps1 @@ -0,0 +1,14 @@ +########################################## +# Author: Marius Gielnik # +# E-Mail: marius.gielnik@gc-gruppe.de # +########################################## + +# Checkmk local check for MS Storage Replication Status # + +$replicationStatus = (Get-SRGroup).ReplicationStatus + +if ($replicationStatus -match "ContinuouslyReplicating") { + Write-Output "0 StorageReplicationStatus - OK: Continuously Replicating" +} else { + Write-Output "2 StorageReplicationStatus - CRITICAL: Sporadically Replicating" +} \ No newline at end of file diff --git a/local checks/sssl check b/local checks/sssl check new file mode 100644 index 0000000..1ef4020 --- /dev/null +++ b/local checks/sssl check @@ -0,0 +1,149 @@ +#!/bin/bash + +# Schwellwerte definieren +warn=40 +crit=10 +max_days=365.0 + +# Liste der Domains +DOMAINS=( +    "ad.ghnit.net" +    "aduxa.de" +    "api.distributors-network.com" +    "aurenz.gc-gruppe.net" +    "avdelinger.heidenreich.no" +    "bimsplus24.com.pl" +    "bimsplus24.pl" +    "city-log.de" +    "conel.de" +    "cordesundgraefe-stiftung.de" +    "corpo-group.com" +    "cosmo-info.de" +    "cpi-dev.ad.ghnit.net" +    "cpi-prod.ad.ghnit.net" +    "cpi-test.ad.ghnit.net" +    "datpool.net" +    "dev.rz.gc-gruppe.net" +    "developer.distributors-network.com" +    "distributors-network.com" +    "dtg-handel.de" +    "dtgonlineplus.de" +    "edvstatus.gc-gruppe.de" +    "efg-gruppe.de" +    "efgonlineplus.de" +    "elements-a.de" +    "elements-show.at" +    "elements-show.de" +    "fkr.de" +    "fkronlineplus.de" +    "gc-gruppe.at" +    "gc-gruppe.de" +    "gc-gruppe.net" +    "gc-neuheiten-messe.de" +    "gcg-cee-fr-133.mra.datpool.net" +    "gconlineplus.at" +    "gconlineplus.be" +    "gconlineplus.de" +    "gconlineplus.hu" +    "grosshaendlernetzwerk.at" +    "grosshaendlernetzwerk.de" +    "gut-gruppe.de" +    "gutonlineplus.de" +    "heidenreich-online.no" +    "hti24.com.pl" +    "hti24.pl" +    "hti-bulgaria.com" +    "hti-handel.de" +    "htionlineplus.at" +    "htionlineplus.de" +    "huelsenonlineplus.de" +    "hydrosolar24.com.pl" +    "hydrosolar24.pl" +    "hydrosolaronline.pl" +    "i-log.de" +    "itg-handel.de" +    "itgonlineplus.com" +    "itgonlineplus.de" +    "jobs.karriere-chancen.net" +    "kuechenstil.de" +    "laponeonlineplus.de" +    "lvmatcher2.gc-gruppe.net" +    "lvmatcher.gc-gruppe.net" +    "nfg-gruppe.at" +    "nfg-gruppe.de" +    "nfgonlineplus.de" +    "onlineplus.store" +    "poesis.de" +    "productdataportal.com" +    "produktdatenportal.gc-gruppe.de" +    "red-de-distribuidores.es" +    "rz.gc-gruppe.net" +    "sap.gc-gruppe.net" +    "schachtplaner.de" +    "servicequadrat.info" +    "siabiosca.com" +    "sistems.at" +    "sistems.de" +    "tecget.de" +    "tfg-gruppe.de" +    "tfgonlineplus.de" +    "trinnity.de" +    "vigour.de" +    "vigour.es" +    "wconfigure.com" +    "armtestrz05.rz.gc-gruppe.net" +) + +# Funktion zur Prüfung eines Zertifikats +check_certificate() { +    DOMAIN=$1 +    TIMEOUT=2 + +    # Port je nach Domain setzen +    if [ "$DOMAIN" == "armtestrz05.rz.gc-gruppe.net" ]; then +        PORT=8443 +    else +        PORT=443 +    fi + +    # Ablaufdatum des Zertifikats abrufen mit Timeout und Fehlerausgabe unterdrücken +    EXPIRY_DATE=$(timeout $TIMEOUT openssl s_client -connect $DOMAIN:$PORT -servername $DOMAIN 2>/dev/null | openssl x509 -noout -enddate 2>/dev/null | cut -d= -f2) + +    if [ -z "$EXPIRY_DATE" ]; then +        echo "2 SSL_CERT_$DOMAIN remaining_days=0;$warn;$crit;0;$max_days CRITICAL - Das Zertifikat für $DOMAIN konnte nicht abgerufen werden (Timeout). | remaining_days=0;$warn;$crit;0;$max_days" +        return 2 +    fi + +    # Ablaufdatum in Sekunden seit dem Unix-Epoch umrechnen +    EXPIRY_DATE_SECONDS=$(date -d "$EXPIRY_DATE" +%s) + +    # Aktuelles Datum in Sekunden seit dem Unix-Epoch +    CURRENT_DATE_SECONDS=$(date +%s) + +    # Verbleibende Tage berechnen +    REMAINING_DAYS=$(( (EXPIRY_DATE_SECONDS - CURRENT_DATE_SECONDS) / 86400 )) + +    # CheckMK Status und Nachricht definieren +    if [ $REMAINING_DAYS -le $crit ]; then +        echo "2 SSL_CERT_$DOMAIN remaining_days=$REMAINING_DAYS;$warn;$crit;0;$max_days CRITICAL - Das Zertifikat für $DOMAIN läuft in $REMAINING_DAYS Tagen ab. | remaining_days=$REMAINING_DAYS;$warn;$crit;0;$max_days" +        return 2 +    elif [ $REMAINING_DAYS -le $warn ]; then +        echo "1 SSL_CERT_$DOMAIN remaining_days=$REMAINING_DAYS;$warn;$crit;0;$max_days WARNING - Das Zertifikat für $DOMAIN läuft in $REMAINING_DAYS Tagen ab. | remaining_days=$REMAINING_DAYS;$warn;$crit;0;$max_days" +        return 1 +    else +        echo "0 SSL_CERT_$DOMAIN remaining_days=$REMAINING_DAYS;$warn;$crit;0;$max_days OK - Das Zertifikat für $DOMAIN läuft in $REMAINING_DAYS Tagen ab. | remaining_days=$REMAINING_DAYS;$warn;$crit;0;$max_days" +        return 0 +    fi +} + +# Hauptschleife zur Prüfung aller Domains +overall_status=0 +for domain in "${DOMAINS[@]}"; do +    check_certificate $domain +    status=$? +    if [ $status -gt $overall_status ]; then +        overall_status=$status +    fi +done + +exit $overall_status \ No newline at end of file