批量处理手段是运维必备的工具之一,实现的方式多样,例如常用ansible等,本文介绍用python实现该工作
伪代码如下:
#/bin/python
# -*- coding: utf-8 -*-
import paramiko
import time
import socket
import os
# 服务器信息列表
servers = [
#{'hostname': 'xxx', 'username': 'xxx', 'password': 'xxx'},
# 更多服务器...
]
# 条件判断命令或脚本
check_command1 = "XX"
# 重启服务器命令
reboot_command1 = "XX"
# 重启完成后检查命令
service_check_command1 = "XXX"
# 轮询间隔时间(秒)
polling_interval = 60
# 登陆函数,成功登陆返回,不成功返回空
def ssh_login(hostname, username, password, max_retries=10, retry_interval=10):
retries = 0
while retries < max_retries:
try:
# 创建SSH客户端实例
ssh_client = paramiko.SSHClient()
ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
# 尝试连接到服务器
ssh_client.connect(hostname, username=username, password=password)
# 如果连接成功,则打印提示信息并返回SSH客户端实例
print("SSH login successful!")
return ssh_client
except paramiko.AuthenticationException:
# 凭据错误,无法登录,增加重试次数并等待一段时间后重试
print("Authentication failed. Retrying in {} seconds...".format(retry_interval))
retries += 1
time.sleep(retry_interval)
except paramiko.SSHException:
# SSH连接异常,增加重试次数并等待一段时间后重试
print("SSH connection failed. Retrying in {} seconds...".format(retry_interval))
retries += 1
time.sleep(retry_interval)
except Exception as e:
# 其他异常,打印错误信息并退出
print("Error: {}".format(str(e)))
break
# 如果达到最大重试次数仍然无法登录,则打印错误信息并返回None
print("Exceeded maximum retries. Failed to login to SSH server.")
return None
# 遍历服务器列表,执行操作
for server in servers:
try:
#登陆服务器
ssh_client = ssh_login(server['hostname'], username=server['username'], password=server['password'])
if ssh_client:
# 执行条件判断
stdin, stdout, stderr = ssh_client.exec_command(check_command1)
condition_met1 = stdout.read()
# 如果条件满足,重启服务器
if not condition_met1 :
print("条件满足,正在重启 %s" % server['hostname'])
stdin, stdout, stderr = ssh_client.exec_command(reboot_command1, get_pty=True)
print("%s 已开始重启..." % server['hostname'])
time.sleep(polling_interval)
# 关闭SSH连接
ssh_client.close()
# 等待服务器重启
while True:
response = os.system("ping -c 10 " + server['hostname'])
if response == 0:
print("Server is up!")
ssh_client = ssh_login(server['hostname'], username=server['username'], password=server['password'])
# 重启服务器完成后,登陆执行检查命令
if ssh_client:
stdin, stdout, stderr = ssh_client.exec_command(service_check_command1)
service_status1 = stdout.read()
print service_status1
if service_status1 :
print("%s 重启完成,服务已恢复。" % server['hostname'])
# 关闭SSH连接
ssh_client.close()
break
else:
print("Server is still rebooting...")
time.sleep(polling_interval) # 等待 xx 秒后再次检测
except Exception, e:
print("连接 %s 出错:%s" % (server['hostname'], e))
finally:
# 关闭SSH连接
ssh_client.close()
print("完成操作 %s,继续下一台服务器。" % server['hostname'])