Bash scripts for managing safe nodes on Linux

This python script parses the safenode-manager status command and just constantly runs though to search for nodes with 0 connections, reboots them. You can leave it running permanently. Once complete it checks again every hour.

I can probably be improved but… it works lol.

import subprocess
import time
from datetime import datetime, timedelta

# Dictionary to store the last reboot time for each node
last_reboot_time = {}

def get_safenode_status():
    """Runs the `safenode-manager status` command and returns the output as a list of lines."""
    print("Fetching safenode status...")
    result = subprocess.run(['safenode-manager', 'status'], capture_output=True, text=True)
    print("Status fetched.")
    return result.stdout.splitlines()

def parse_status_output(status_lines):
    """Parses the output from `safenode-manager status` and returns a list of nodes with 0 connections."""
    print("Parsing status output...")
    nodes_with_zero_connections = []
    for line in status_lines:
        print(f"Processing line: {line}")
        parts = line.split()
        if len(parts) < 4:
            print(f"Skipping malformed line: {line}")
            continue  # Skip malformed lines
        node_name = parts[0]
        try:
            connections = int(parts[-1])
        except ValueError:
            print(f"Skipping line due to parsing error: {line}")
            continue
        print(f"Node: {node_name}, Connections: {connections}")
        if connections == 0:
            print(f"Node {node_name} has 0 connections.")
            nodes_with_zero_connections.append(node_name)
    print("Finished parsing status output.")
    return nodes_with_zero_connections

def wait_for_stop(node_name):
    """Waits until the node is fully stopped by checking its status repeatedly."""
    print(f"Waiting for {node_name} to fully stop...")
    while True:
        status_lines = get_safenode_status()
        for line in status_lines:
            if node_name in line:
                if "STOPPED" in line:
                    print(f"{node_name} is now stopped.")
                    return
        print(f"{node_name} is not yet stopped, checking again in 5 seconds...")
        time.sleep(5)  # Check every 5 seconds

def reboot_node(node_name):
    """Reboots a node by stopping and starting the service."""
    print(f"Rebooting {node_name}...")
    stop_command = ['safenode-manager', 'stop', '--service-name', node_name]
    start_command = ['safenode-manager', 'start', '--service-name', node_name]
    
    print(f"Stopping {node_name}...")
    subprocess.run(stop_command)
    print(f"Stopped {node_name}, waiting for it to fully stop...")
    wait_for_stop(node_name)
    print(f"Starting {node_name}...")
    subprocess.run(start_command)
    print(f"Started {node_name}")
    # Update the last reboot time for the node
    last_reboot_time[node_name] = datetime.now()
    print(f"Updated last reboot time for {node_name} to {last_reboot_time[node_name]}.")

def should_reboot(node_name):
    """Checks if the node should be rebooted based on the last reboot time."""
    if node_name not in last_reboot_time:
        print(f"{node_name} has not been rebooted before, should reboot.")
        return True
    if datetime.now() - last_reboot_time[node_name] > timedelta(hours=1):
        print(f"More than an hour has passed since {node_name} was last rebooted, should reboot.")
        return True
    print(f"Less than an hour since {node_name} was last rebooted, skipping reboot.")
    return False

def main():
    print("Starting safenode monitoring script...")
    status_lines = get_safenode_status()
    nodes_to_reboot = parse_status_output(status_lines)
    
    for node_name in nodes_to_reboot:
        if should_reboot(node_name):
            reboot_node(node_name)
        else:
            print(f"Skipping reboot for {node_name} as it was recently rebooted.")
    
    print("Sleeping for one hour before next check...")
    time.sleep(3600)  # Sleep for one hour

if __name__ == "__main__":
    while True:
        main()

2 Likes