This python script parses the safenode-manager status
command and just constantly runs though to search for nodes with 0 connections, reboots them. You can leave it running permanently. Once complete it checks again every hour.
I can probably be improved but… it works lol.
import subprocess
import time
from datetime import datetime, timedelta
# Dictionary to store the last reboot time for each node
last_reboot_time = {}
def get_safenode_status():
"""Runs the `safenode-manager status` command and returns the output as a list of lines."""
print("Fetching safenode status...")
result = subprocess.run(['safenode-manager', 'status'], capture_output=True, text=True)
print("Status fetched.")
return result.stdout.splitlines()
def parse_status_output(status_lines):
"""Parses the output from `safenode-manager status` and returns a list of nodes with 0 connections."""
print("Parsing status output...")
nodes_with_zero_connections = []
for line in status_lines:
print(f"Processing line: {line}")
parts = line.split()
if len(parts) < 4:
print(f"Skipping malformed line: {line}")
continue # Skip malformed lines
node_name = parts[0]
try:
connections = int(parts[-1])
except ValueError:
print(f"Skipping line due to parsing error: {line}")
continue
print(f"Node: {node_name}, Connections: {connections}")
if connections == 0:
print(f"Node {node_name} has 0 connections.")
nodes_with_zero_connections.append(node_name)
print("Finished parsing status output.")
return nodes_with_zero_connections
def wait_for_stop(node_name):
"""Waits until the node is fully stopped by checking its status repeatedly."""
print(f"Waiting for {node_name} to fully stop...")
while True:
status_lines = get_safenode_status()
for line in status_lines:
if node_name in line:
if "STOPPED" in line:
print(f"{node_name} is now stopped.")
return
print(f"{node_name} is not yet stopped, checking again in 5 seconds...")
time.sleep(5) # Check every 5 seconds
def reboot_node(node_name):
"""Reboots a node by stopping and starting the service."""
print(f"Rebooting {node_name}...")
stop_command = ['safenode-manager', 'stop', '--service-name', node_name]
start_command = ['safenode-manager', 'start', '--service-name', node_name]
print(f"Stopping {node_name}...")
subprocess.run(stop_command)
print(f"Stopped {node_name}, waiting for it to fully stop...")
wait_for_stop(node_name)
print(f"Starting {node_name}...")
subprocess.run(start_command)
print(f"Started {node_name}")
# Update the last reboot time for the node
last_reboot_time[node_name] = datetime.now()
print(f"Updated last reboot time for {node_name} to {last_reboot_time[node_name]}.")
def should_reboot(node_name):
"""Checks if the node should be rebooted based on the last reboot time."""
if node_name not in last_reboot_time:
print(f"{node_name} has not been rebooted before, should reboot.")
return True
if datetime.now() - last_reboot_time[node_name] > timedelta(hours=1):
print(f"More than an hour has passed since {node_name} was last rebooted, should reboot.")
return True
print(f"Less than an hour since {node_name} was last rebooted, skipping reboot.")
return False
def main():
print("Starting safenode monitoring script...")
status_lines = get_safenode_status()
nodes_to_reboot = parse_status_output(status_lines)
for node_name in nodes_to_reboot:
if should_reboot(node_name):
reboot_node(node_name)
else:
print(f"Skipping reboot for {node_name} as it was recently rebooted.")
print("Sleeping for one hour before next check...")
time.sleep(3600) # Sleep for one hour
if __name__ == "__main__":
while True:
main()