add content to tsm_backup_check.py

2026-01-13 23:33:28 +01:00 · 2026-01-13 23:33:28 +01:00 · 91522703ad
parent e5962af62d
commit 91522703ad
2 changed files with 1006 additions and 0 deletions
--- a/TSM/tms_backup_check.py
+++ b/TSM/tms_backup_check.py
@ -0,0 +1,503 @@
+#!/usr/bin/env python3
+"""
+TSM Backup Status Check Plugin for CheckMK 2.3+
+- Parses tsm_backups agent section
+- Creates services with labels (backup_type, frequency, level, error_handling)
+- Dynamic backup type detection
+- Configurable type-specific thresholds
+- Tolerant error handling for FILE/VIRTUAL backups
+
+Installation: /omd/sites/monitoring/local/lib/python3/cmk_addons/plugins/tsm/agent_based/tsm_backups.py
+Check name: tsm_backups
+
+Author: Marius Gielnik
+Version: 4.1 - Fixed Label Detection & Comprehensive Documentation
+"""
+from typing import Any, Mapping
+from datetime import datetime
+import json
+import re
+
+from cmk.agent_based.v2 import (
+    AgentSection,
+    CheckPlugin,
+    CheckResult,
+    DiscoveryResult,
+    Result,
+    Service,
+    ServiceLabel,
+    State,
+    Metric,
+    render,
+    StringTable,
+)
+
+# ============================================================================
+# Configuration Section
+# ============================================================================
+
+# Type-specific thresholds (in seconds)
+# New types can be added here dynamically without code changes
+THRESHOLDS = {
+    "log": {"warn": 4 * 3600, "crit": 8 * 3600},              # Hourly LOG backups
+    "mssql": {"warn": 26 * 3600, "crit": 48 * 3600},          # Daily DB backups
+    "hana": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "db2": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "oracle": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "mysql": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "file": {"warn": 36 * 3600, "crit": 72 * 3600},           # More tolerant
+    "virtual": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "mail": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "scale": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "dm": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "datacenter": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "default": {"warn": 26 * 3600, "crit": 48 * 3600},        # Fallback
+}
+
+# Types that should use tolerant error handling
+# Failed/Missed backups result in WARNING instead of CRITICAL
+TOLERANT_TYPES = {
+    'file', 'virtual', 'scale', 'dm', 'datacenter', 'mail'
+}
+
+# Known database types (for better classification)
+DATABASE_TYPES = {
+    'mssql', 'hana', 'db2', 'oracle', 'mysql'
+}
+
+# Known virtualization types
+VIRTUAL_TYPES = {
+    'virtual'
+}
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+def extract_backup_type(node: str) -> str:
+    """
+    Extrahiert Backup-Typ aus Node-Namen.
+    
+    Bekannte Typen werden erkannt, alle anderen werden als "unknown" markiert.
+    
+    Examples:
+        MYSERVER_MSSQL -> mssql
+        DATABASE_HANA_01 -> hana
+        FILESERVER_FILE -> file
+        VM_HYPERV_123 -> hyperv
+    
+    Args:
+        node: Node name from TSM
+        
+    Returns:
+        str: Detected backup type in lowercase
+    """
+    if '_' not in node:
+        return "unknown"
+
+    parts = node.split('_')
+    last = parts[-1].upper()
+
+    # Bekannte Backup-Typen (fest definiert)
+    known_types = [
+        'MSSQL', 'HANA', 'FILE', 'ORACLE', 'DB2', 'SCALE', 'DM',
+        'DATACENTER', 'VIRTUAL', 'MAIL', 'MYSQL'
+    ]
+
+    if last in known_types:
+        return last.lower()
+
+    # Falls letztes Segment numerisch, versuche vorletztes
+    if last.isdigit() and len(parts) > 1:
+        second_last = parts[-2].upper()
+        if second_last in known_types:
+            return second_last.lower()
+
+    return "unknown"
+
+
+def extract_backup_level(schedules: list) -> str:
+    """
+    Extracts backup level from schedule names.
+    
+    Priority: log > full > differential > incremental
+    
+    Args:
+        schedules: List of schedule names
+        
+    Returns:
+        str: Detected backup level
+    """
+    levels_found = set()
+
+    for schedule in schedules:
+        schedule_upper = schedule.upper()
+
+        if '_LOG' in schedule_upper or 'LOG' in schedule_upper:
+            levels_found.add('log')
+        elif '_FULL' in schedule_upper or 'FULL' in schedule_upper:
+            levels_found.add('full')
+        elif '_INCR' in schedule_upper or 'INCREMENTAL' in schedule_upper:
+            levels_found.add('incremental')
+        elif '_DIFF' in schedule_upper or 'DIFFERENTIAL' in schedule_upper:
+            levels_found.add('differential')
+
+    # Return in priority order
+    if 'log' in levels_found:
+        return 'log'
+    if 'full' in levels_found:
+        return 'full'
+    if 'differential' in levels_found:
+        return 'differential'
+    if 'incremental' in levels_found:
+        return 'incremental'
+
+    return 'full'  # Default
+
+
+def extract_frequency(schedules: list) -> str:
+    """
+    Extracts backup frequency from schedule names.
+    
+    Priority: hourly > daily > weekly > monthly
+    
+    Args:
+        schedules: List of schedule names
+        
+    Returns:
+        str: Detected frequency
+    """
+    frequencies_found = set()
+
+    for schedule in schedules:
+        schedule_upper = schedule.upper()
+
+        if 'HOURLY' in schedule_upper:
+            frequencies_found.add('hourly')
+        elif 'DAILY' in schedule_upper:
+            frequencies_found.add('daily')
+        elif 'WEEKLY' in schedule_upper:
+            frequencies_found.add('weekly')
+        elif 'MONTHLY' in schedule_upper:
+            frequencies_found.add('monthly')
+        # Time-based pattern detection (HH-MM-SS_)
+        elif re.match(r'^\d{2}-\d{2}-\d{2}_', schedule):
+            if '_LOG' in schedule_upper:
+                frequencies_found.add('hourly')
+            elif schedule.startswith('00-00-00'):
+                frequencies_found.add('daily')
+            else:
+                frequencies_found.add('daily')
+
+    # Return in priority order
+    if 'hourly' in frequencies_found:
+        return 'hourly'
+    if 'daily' in frequencies_found:
+        return 'daily'
+    if 'weekly' in frequencies_found:
+        return 'weekly'
+    if 'monthly' in frequencies_found:
+        return 'monthly'
+
+    return 'unknown'
+
+
+def get_error_handling(backup_type: str) -> str:
+    """
+    Determines error handling strategy based on backup type.
+    
+    Tolerant types: Failed backups trigger WARNING instead of CRITICAL
+    Strict types: Failed backups trigger CRITICAL
+    
+    Args:
+        backup_type: Detected backup type
+        
+    Returns:
+        str: 'tolerant' or 'strict'
+    """
+    return 'tolerant' if backup_type in TOLERANT_TYPES else 'strict'
+
+
+def get_backup_category(backup_type: str) -> str:
+    """
+    Categorizes backup type for additional labeling.
+    
+    Categories: database, virtualization, filesystem, application, other
+    
+    Args:
+        backup_type: Detected backup type
+        
+    Returns:
+        str: Category name
+    """
+    if backup_type in DATABASE_TYPES:
+        return 'database'
+    elif backup_type in VIRTUAL_TYPES:
+        return 'virtualization'
+    elif backup_type in {'file', 'scale', 'dm', 'datacenter'}:
+        return 'filesystem'
+    elif backup_type in {'mail', 'exchange'}:
+        return 'application'
+    else:
+        return 'other'
+
+
+def get_thresholds(backup_type: str, backup_level: str) -> dict:
+    """
+    Returns type and level-specific thresholds.
+    
+    Priority:
+    1. If level is 'log', use log thresholds
+    2. If type has specific thresholds, use those
+    3. Use default thresholds
+    
+    Args:
+        backup_type: Detected backup type
+        backup_level: Detected backup level
+        
+    Returns:
+        dict: {"warn": seconds, "crit": seconds}
+    """
+    if backup_level == 'log':
+        return THRESHOLDS['log']
+    elif backup_type in THRESHOLDS:
+        return THRESHOLDS[backup_type]
+    else:
+        return THRESHOLDS['default']
+
+
+def calculate_state(statuses: list, last_time: int, backup_type: str, 
+                    error_handling: str) -> tuple:
+    """
+    Calculates CheckMK state from backup statuses.
+    
+    Logic:
+    - At least 1x Completed -> OK
+    - Only Pending/Started (recent) -> OK
+    - Only Pending/Started (old) -> WARN
+    - Failed/Missed + tolerant -> WARN
+    - Failed/Missed + strict -> CRIT
+    
+    Args:
+        statuses: List of backup statuses
+        last_time: Timestamp of last backup
+        backup_type: Detected backup type
+        error_handling: 'tolerant' or 'strict'
+        
+    Returns:
+        tuple: (State, status_text)
+    """
+    statuses_lower = [s.lower() for s in statuses]
+
+    # At least one completed backup -> OK
+    if "completed" in statuses_lower:
+        return (State.OK, "Completed")
+
+    # Only Pending/Started
+    only_pending_started = all(s in ["pending", "started"] for s in statuses_lower)
+    if only_pending_started:
+        if last_time:
+            age = int(datetime.now().timestamp()) - last_time
+            if age < 2 * 3600:  # 2 hours
+                return (State.OK, "Pending/Started")
+            else:
+                return (State.WARN, "Pending (>2h)")
+        else:
+            return (State.WARN, "Pending")
+
+    # Failed or Missed backups
+    has_failed = any("failed" in s for s in statuses_lower)
+    has_missed = "missed" in statuses_lower
+
+    if has_failed or has_missed:
+        if error_handling == 'tolerant':
+            return (State.WARN, "Failed (partial)")
+        else:
+            return (State.CRIT, "Failed/Missed")
+
+    return (State.CRIT, "Unknown State")
+
+
+# ============================================================================
+# Agent Section Registration
+# ============================================================================
+
+def parse_tsm_backups(string_table: StringTable) -> Mapping[str, Any]:
+    """
+    Parses the tsm_backups agent section.
+    
+    Expected format: JSON string from agent plugin
+    
+    Args:
+        string_table: Raw agent output
+        
+    Returns:
+        dict: Parsed backup data per node
+    """
+    if not string_table or not string_table[0]:
+        return {}
+
+    try:
+        json_str = string_table[0][0]
+        return json.loads(json_str)
+    except (json.JSONDecodeError, IndexError, KeyError):
+        return {}
+
+
+agent_section_tsm_backups = AgentSection(
+    name="tsm_backups",
+    parse_function=parse_tsm_backups,
+)
+
+
+# ============================================================================
+# Check Plugin Registration
+# ============================================================================
+
+def discover_tsm_backups(section: Mapping[str, Any]) -> DiscoveryResult:
+    """
+    Discovery function - creates one service per logical node.
+    
+    Services are created with dynamic labels:
+    - backup_type: Dynamically detected from node name
+    - backup_category: database/virtualization/filesystem/application/other
+    - backup_system: Always "tsm"
+    - frequency: hourly/daily/weekly/monthly/unknown
+    - backup_level: log/full/differential/incremental
+    - error_handling: tolerant/strict
+    - node_name: Original node name
+    
+    Args:
+        section: Parsed agent data
+        
+    Yields:
+        Service: CheckMK service objects with labels
+    """
+    for node in section:
+        data = section[node]
+        
+        # Extract metadata dynamically
+        backup_type = extract_backup_type(node)
+        backup_level = extract_backup_level(data["schedules"])
+        frequency = extract_frequency(data["schedules"])
+        error_handling = get_error_handling(backup_type)
+        category = get_backup_category(backup_type)
+
+        # Create service with dynamic labels
+        yield Service(
+            item=node,
+            labels=[
+                ServiceLabel("backup_type", backup_type),
+                ServiceLabel("backup_category", category),
+                ServiceLabel("backup_system", "tsm"),
+                ServiceLabel("frequency", frequency),
+                ServiceLabel("backup_level", backup_level),
+                ServiceLabel("error_handling", error_handling),
+                ServiceLabel("node_name", node),
+            ]
+        )
+
+
+def check_tsm_backups(item: str, section: Mapping[str, Any]) -> CheckResult:
+    """
+    Check function - evaluates backup status and generates metrics.
+    
+    Checks:
+    - Backup completion status
+    - Backup age against type-specific thresholds
+    - Number of backup jobs
+    
+    Args:
+        item: Node name (service identifier)
+        section: Parsed agent data
+        
+    Yields:
+        Result: Check results with state
+        Metric: Performance metrics
+    """
+    if item not in section:
+        yield Result(
+            state=State.UNKNOWN, 
+            summary=f"Backup {item} not found in data"
+        )
+        return
+
+    data = section[item]
+
+    # Extract metadata dynamically
+    backup_type = extract_backup_type(item)
+    backup_level = extract_backup_level(data["schedules"])
+    frequency = extract_frequency(data["schedules"])
+    error_handling = get_error_handling(backup_type)
+    category = get_backup_category(backup_type)
+
+    # Calculate state
+    state, status_text = calculate_state(
+        data["statuses"],
+        data["last"],
+        backup_type,
+        error_handling
+    )
+
+    # Calculate backup age
+    if data["last"]:
+        age_seconds = int(datetime.now().timestamp()) - data["last"]
+        age_txt = render.timespan(age_seconds)
+    else:
+        age_seconds = 999999
+        age_txt = "unknown"
+
+    # Get type-specific thresholds
+    thresholds = get_thresholds(backup_type, backup_level)
+    warn_seconds = thresholds["warn"]
+    crit_seconds = thresholds["crit"]
+
+    # Check age against thresholds (only if backup completed)
+    if state == State.OK and age_seconds > crit_seconds:
+        state = State.CRIT
+    elif state == State.OK and age_seconds > warn_seconds:
+        state = State.WARN
+
+    # Build summary
+    summary = (
+        f"Type={backup_type.upper()} ({category}), "
+        f"Level={backup_level.upper()}, "
+        f"Freq={frequency}, "
+        f"Status={status_text}, "
+        f"Last={age_txt}, "
+        f"Jobs={data['count']}"
+    )
+
+    # Main result
+    yield Result(state=state, summary=summary)
+
+    # Detailed information
+    if state in [State.WARN, State.CRIT]:
+        details = (
+            f"Thresholds: WARN={render.timespan(warn_seconds)}, "
+            f"CRIT={render.timespan(crit_seconds)}"
+        )
+        yield Result(state=State.OK, notice=details)
+
+    # Metrics
+    yield Metric(
+        name="backup_age",
+        value=age_seconds,
+        levels=(warn_seconds, crit_seconds),
+        boundaries=(0, None),
+    )
+
+    yield Metric(
+        name="backup_jobs",
+        value=data["count"],
+    )
+
+
+check_plugin_tsm_backups = CheckPlugin(
+    name="tsm_backups",
+    service_name="TSM Backup %s",
+    discovery_function=discover_tsm_backups,
+    check_function=check_tsm_backups,
+    sections=["tsm_backups"],
+)
--- a/TSM/tsm_backup_check.py
+++ b/TSM/tsm_backup_check.py
@ -0,0 +1,503 @@
+#!/usr/bin/env python3
+"""
+TSM Backup Status Check Plugin for CheckMK 2.3+
+- Parses tsm_backups agent section
+- Creates services with labels (backup_type, frequency, level, error_handling)
+- Dynamic backup type detection
+- Configurable type-specific thresholds
+- Tolerant error handling for FILE/VIRTUAL backups
+
+Installation: /omd/sites/monitoring/local/lib/python3/cmk_addons/plugins/tsm/agent_based/tsm_backups.py
+Check name: tsm_backups
+
+Author: Marius Gielnik
+Version: 4.1 - Fixed Label Detection & Comprehensive Documentation
+"""
+from typing import Any, Mapping
+from datetime import datetime
+import json
+import re
+
+from cmk.agent_based.v2 import (
+    AgentSection,
+    CheckPlugin,
+    CheckResult,
+    DiscoveryResult,
+    Result,
+    Service,
+    ServiceLabel,
+    State,
+    Metric,
+    render,
+    StringTable,
+)
+
+# ============================================================================
+# Configuration Section
+# ============================================================================
+
+# Type-specific thresholds (in seconds)
+# New types can be added here dynamically without code changes
+THRESHOLDS = {
+    "log": {"warn": 4 * 3600, "crit": 8 * 3600},              # Hourly LOG backups
+    "mssql": {"warn": 26 * 3600, "crit": 48 * 3600},          # Daily DB backups
+    "hana": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "db2": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "oracle": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "mysql": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "file": {"warn": 36 * 3600, "crit": 72 * 3600},           # More tolerant
+    "virtual": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "mail": {"warn": 26 * 3600, "crit": 48 * 3600},
+    "scale": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "dm": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "datacenter": {"warn": 36 * 3600, "crit": 72 * 3600},
+    "default": {"warn": 26 * 3600, "crit": 48 * 3600},        # Fallback
+}
+
+# Types that should use tolerant error handling
+# Failed/Missed backups result in WARNING instead of CRITICAL
+TOLERANT_TYPES = {
+    'file', 'virtual', 'scale', 'dm', 'datacenter', 'mail'
+}
+
+# Known database types (for better classification)
+DATABASE_TYPES = {
+    'mssql', 'hana', 'db2', 'oracle', 'mysql'
+}
+
+# Known virtualization types
+VIRTUAL_TYPES = {
+    'virtual'
+}
+
+
+# ============================================================================
+# Helper Functions
+# ============================================================================
+
+def extract_backup_type(node: str) -> str:
+    """
+    Extrahiert Backup-Typ aus Node-Namen.
+    
+    Bekannte Typen werden erkannt, alle anderen werden als "unknown" markiert.
+    
+    Examples:
+        MYSERVER_MSSQL -> mssql
+        DATABASE_HANA_01 -> hana
+        FILESERVER_FILE -> file
+        VM_HYPERV_123 -> hyperv
+    
+    Args:
+        node: Node name from TSM
+        
+    Returns:
+        str: Detected backup type in lowercase
+    """
+    if '_' not in node:
+        return "unknown"
+
+    parts = node.split('_')
+    last = parts[-1].upper()
+
+    # Bekannte Backup-Typen (fest definiert)
+    known_types = [
+        'MSSQL', 'HANA', 'FILE', 'ORACLE', 'DB2', 'SCALE', 'DM',
+        'DATACENTER', 'VIRTUAL', 'MAIL', 'MYSQL'
+    ]
+
+    if last in known_types:
+        return last.lower()
+
+    # Falls letztes Segment numerisch, versuche vorletztes
+    if last.isdigit() and len(parts) > 1:
+        second_last = parts[-2].upper()
+        if second_last in known_types:
+            return second_last.lower()
+
+    return "unknown"
+
+
+def extract_backup_level(schedules: list) -> str:
+    """
+    Extracts backup level from schedule names.
+    
+    Priority: log > full > differential > incremental
+    
+    Args:
+        schedules: List of schedule names
+        
+    Returns:
+        str: Detected backup level
+    """
+    levels_found = set()
+
+    for schedule in schedules:
+        schedule_upper = schedule.upper()
+
+        if '_LOG' in schedule_upper or 'LOG' in schedule_upper:
+            levels_found.add('log')
+        elif '_FULL' in schedule_upper or 'FULL' in schedule_upper:
+            levels_found.add('full')
+        elif '_INCR' in schedule_upper or 'INCREMENTAL' in schedule_upper:
+            levels_found.add('incremental')
+        elif '_DIFF' in schedule_upper or 'DIFFERENTIAL' in schedule_upper:
+            levels_found.add('differential')
+
+    # Return in priority order
+    if 'log' in levels_found:
+        return 'log'
+    if 'full' in levels_found:
+        return 'full'
+    if 'differential' in levels_found:
+        return 'differential'
+    if 'incremental' in levels_found:
+        return 'incremental'
+
+    return 'full'  # Default
+
+
+def extract_frequency(schedules: list) -> str:
+    """
+    Extracts backup frequency from schedule names.
+    
+    Priority: hourly > daily > weekly > monthly
+    
+    Args:
+        schedules: List of schedule names
+        
+    Returns:
+        str: Detected frequency
+    """
+    frequencies_found = set()
+
+    for schedule in schedules:
+        schedule_upper = schedule.upper()
+
+        if 'HOURLY' in schedule_upper:
+            frequencies_found.add('hourly')
+        elif 'DAILY' in schedule_upper:
+            frequencies_found.add('daily')
+        elif 'WEEKLY' in schedule_upper:
+            frequencies_found.add('weekly')
+        elif 'MONTHLY' in schedule_upper:
+            frequencies_found.add('monthly')
+        # Time-based pattern detection (HH-MM-SS_)
+        elif re.match(r'^\d{2}-\d{2}-\d{2}_', schedule):
+            if '_LOG' in schedule_upper:
+                frequencies_found.add('hourly')
+            elif schedule.startswith('00-00-00'):
+                frequencies_found.add('daily')
+            else:
+                frequencies_found.add('daily')
+
+    # Return in priority order
+    if 'hourly' in frequencies_found:
+        return 'hourly'
+    if 'daily' in frequencies_found:
+        return 'daily'
+    if 'weekly' in frequencies_found:
+        return 'weekly'
+    if 'monthly' in frequencies_found:
+        return 'monthly'
+
+    return 'unknown'
+
+
+def get_error_handling(backup_type: str) -> str:
+    """
+    Determines error handling strategy based on backup type.
+    
+    Tolerant types: Failed backups trigger WARNING instead of CRITICAL
+    Strict types: Failed backups trigger CRITICAL
+    
+    Args:
+        backup_type: Detected backup type
+        
+    Returns:
+        str: 'tolerant' or 'strict'
+    """
+    return 'tolerant' if backup_type in TOLERANT_TYPES else 'strict'
+
+
+def get_backup_category(backup_type: str) -> str:
+    """
+    Categorizes backup type for additional labeling.
+    
+    Categories: database, virtualization, filesystem, application, other
+    
+    Args:
+        backup_type: Detected backup type
+        
+    Returns:
+        str: Category name
+    """
+    if backup_type in DATABASE_TYPES:
+        return 'database'
+    elif backup_type in VIRTUAL_TYPES:
+        return 'virtualization'
+    elif backup_type in {'file', 'scale', 'dm', 'datacenter'}:
+        return 'filesystem'
+    elif backup_type in {'mail', 'exchange'}:
+        return 'application'
+    else:
+        return 'other'
+
+
+def get_thresholds(backup_type: str, backup_level: str) -> dict:
+    """
+    Returns type and level-specific thresholds.
+    
+    Priority:
+    1. If level is 'log', use log thresholds
+    2. If type has specific thresholds, use those
+    3. Use default thresholds
+    
+    Args:
+        backup_type: Detected backup type
+        backup_level: Detected backup level
+        
+    Returns:
+        dict: {"warn": seconds, "crit": seconds}
+    """
+    if backup_level == 'log':
+        return THRESHOLDS['log']
+    elif backup_type in THRESHOLDS:
+        return THRESHOLDS[backup_type]
+    else:
+        return THRESHOLDS['default']
+
+
+def calculate_state(statuses: list, last_time: int, backup_type: str, 
+                    error_handling: str) -> tuple:
+    """
+    Calculates CheckMK state from backup statuses.
+    
+    Logic:
+    - At least 1x Completed -> OK
+    - Only Pending/Started (recent) -> OK
+    - Only Pending/Started (old) -> WARN
+    - Failed/Missed + tolerant -> WARN
+    - Failed/Missed + strict -> CRIT
+    
+    Args:
+        statuses: List of backup statuses
+        last_time: Timestamp of last backup
+        backup_type: Detected backup type
+        error_handling: 'tolerant' or 'strict'
+        
+    Returns:
+        tuple: (State, status_text)
+    """
+    statuses_lower = [s.lower() for s in statuses]
+
+    # At least one completed backup -> OK
+    if "completed" in statuses_lower:
+        return (State.OK, "Completed")
+
+    # Only Pending/Started
+    only_pending_started = all(s in ["pending", "started"] for s in statuses_lower)
+    if only_pending_started:
+        if last_time:
+            age = int(datetime.now().timestamp()) - last_time
+            if age < 2 * 3600:  # 2 hours
+                return (State.OK, "Pending/Started")
+            else:
+                return (State.WARN, "Pending (>2h)")
+        else:
+            return (State.WARN, "Pending")
+
+    # Failed or Missed backups
+    has_failed = any("failed" in s for s in statuses_lower)
+    has_missed = "missed" in statuses_lower
+
+    if has_failed or has_missed:
+        if error_handling == 'tolerant':
+            return (State.WARN, "Failed (partial)")
+        else:
+            return (State.CRIT, "Failed/Missed")
+
+    return (State.CRIT, "Unknown State")
+
+
+# ============================================================================
+# Agent Section Registration
+# ============================================================================
+
+def parse_tsm_backups(string_table: StringTable) -> Mapping[str, Any]:
+    """
+    Parses the tsm_backups agent section.
+    
+    Expected format: JSON string from agent plugin
+    
+    Args:
+        string_table: Raw agent output
+        
+    Returns:
+        dict: Parsed backup data per node
+    """
+    if not string_table or not string_table[0]:
+        return {}
+
+    try:
+        json_str = string_table[0][0]
+        return json.loads(json_str)
+    except (json.JSONDecodeError, IndexError, KeyError):
+        return {}
+
+
+agent_section_tsm_backups = AgentSection(
+    name="tsm_backups",
+    parse_function=parse_tsm_backups,
+)
+
+
+# ============================================================================
+# Check Plugin Registration
+# ============================================================================
+
+def discover_tsm_backups(section: Mapping[str, Any]) -> DiscoveryResult:
+    """
+    Discovery function - creates one service per logical node.
+    
+    Services are created with dynamic labels:
+    - backup_type: Dynamically detected from node name
+    - backup_category: database/virtualization/filesystem/application/other
+    - backup_system: Always "tsm"
+    - frequency: hourly/daily/weekly/monthly/unknown
+    - backup_level: log/full/differential/incremental
+    - error_handling: tolerant/strict
+    - node_name: Original node name
+    
+    Args:
+        section: Parsed agent data
+        
+    Yields:
+        Service: CheckMK service objects with labels
+    """
+    for node in section:
+        data = section[node]
+        
+        # Extract metadata dynamically
+        backup_type = extract_backup_type(node)
+        backup_level = extract_backup_level(data["schedules"])
+        frequency = extract_frequency(data["schedules"])
+        error_handling = get_error_handling(backup_type)
+        category = get_backup_category(backup_type)
+
+        # Create service with dynamic labels
+        yield Service(
+            item=node,
+            labels=[
+                ServiceLabel("backup_type", backup_type),
+                ServiceLabel("backup_category", category),
+                ServiceLabel("backup_system", "tsm"),
+                ServiceLabel("frequency", frequency),
+                ServiceLabel("backup_level", backup_level),
+                ServiceLabel("error_handling", error_handling),
+                ServiceLabel("node_name", node),
+            ]
+        )
+
+
+def check_tsm_backups(item: str, section: Mapping[str, Any]) -> CheckResult:
+    """
+    Check function - evaluates backup status and generates metrics.
+    
+    Checks:
+    - Backup completion status
+    - Backup age against type-specific thresholds
+    - Number of backup jobs
+    
+    Args:
+        item: Node name (service identifier)
+        section: Parsed agent data
+        
+    Yields:
+        Result: Check results with state
+        Metric: Performance metrics
+    """
+    if item not in section:
+        yield Result(
+            state=State.UNKNOWN, 
+            summary=f"Backup {item} not found in data"
+        )
+        return
+
+    data = section[item]
+
+    # Extract metadata dynamically
+    backup_type = extract_backup_type(item)
+    backup_level = extract_backup_level(data["schedules"])
+    frequency = extract_frequency(data["schedules"])
+    error_handling = get_error_handling(backup_type)
+    category = get_backup_category(backup_type)
+
+    # Calculate state
+    state, status_text = calculate_state(
+        data["statuses"],
+        data["last"],
+        backup_type,
+        error_handling
+    )
+
+    # Calculate backup age
+    if data["last"]:
+        age_seconds = int(datetime.now().timestamp()) - data["last"]
+        age_txt = render.timespan(age_seconds)
+    else:
+        age_seconds = 999999
+        age_txt = "unknown"
+
+    # Get type-specific thresholds
+    thresholds = get_thresholds(backup_type, backup_level)
+    warn_seconds = thresholds["warn"]
+    crit_seconds = thresholds["crit"]
+
+    # Check age against thresholds (only if backup completed)
+    if state == State.OK and age_seconds > crit_seconds:
+        state = State.CRIT
+    elif state == State.OK and age_seconds > warn_seconds:
+        state = State.WARN
+
+    # Build summary
+    summary = (
+        f"Type={backup_type.upper()} ({category}), "
+        f"Level={backup_level.upper()}, "
+        f"Freq={frequency}, "
+        f"Status={status_text}, "
+        f"Last={age_txt}, "
+        f"Jobs={data['count']}"
+    )
+
+    # Main result
+    yield Result(state=state, summary=summary)
+
+    # Detailed information
+    if state in [State.WARN, State.CRIT]:
+        details = (
+            f"Thresholds: WARN={render.timespan(warn_seconds)}, "
+            f"CRIT={render.timespan(crit_seconds)}"
+        )
+        yield Result(state=State.OK, notice=details)
+
+    # Metrics
+    yield Metric(
+        name="backup_age",
+        value=age_seconds,
+        levels=(warn_seconds, crit_seconds),
+        boundaries=(0, None),
+    )
+
+    yield Metric(
+        name="backup_jobs",
+        value=data["count"],
+    )
+
+
+check_plugin_tsm_backups = CheckPlugin(
+    name="tsm_backups",
+    service_name="TSM Backup %s",
+    discovery_function=discover_tsm_backups,
+    check_function=check_tsm_backups,
+    sections=["tsm_backups"],
+)