Skip to content

Commit 29385d1

Browse files
committed
Merge branch 'master' of https://github.com/lework/script
2 parents 6c11185 + c0ca2a1 commit 29385d1

File tree

1 file changed

+41
-11
lines changed

1 file changed

+41
-11
lines changed

python/supervisor_healthCheck.py

+41-11
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,8 @@ def find_children(parent_pid, procs):
117117

118118
procs = []
119119
for line in data.splitlines():
120-
pid, ppid, rss = map(int, line.split())
121-
procs.append(ProcInfo(pid=pid, ppid=ppid, rss=rss))
120+
p_pid, p_ppid, p_rss = map(int, line.split())
121+
procs.append(ProcInfo(pid=p_pid, ppid=p_ppid, rss=p_rss))
122122

123123
# 计算rss
124124
try:
@@ -262,7 +262,6 @@ def check(self, config):
262262
initialDelaySeconds = config.get('initialDelaySeconds', self.initialDelaySeconds)
263263
sendResolved = config.get('sendResolved', self.sendResolved)
264264
action_type = config.get('action', 'restart')
265-
action_exec_cmd = config.get('execCmd')
266265

267266
check_type = config.get('type', 'HTTP').lower()
268267
check_method = self.http_check
@@ -324,10 +323,10 @@ def check(self, config):
324323
check_state[program]['failure'] != 0 and check_state[program]['failure'] % (
325324
(periodSeconds + initialDelaySeconds) * 2) == 0):
326325
action_param = {
326+
'config': config,
327327
'action_type': action_type,
328328
'check_status': check_status,
329-
'msg': check_result.get('msg', ''),
330-
'action_exec_cmd': action_exec_cmd
329+
'msg': check_result.get('msg', '')
331330
}
332331
self.action(program, **action_param)
333332
check_state[program]['action'] = True
@@ -485,25 +484,32 @@ def action(self, program, **args):
485484
"""
486485
action_type = args.get('action_type')
487486
msg = args.get('msg')
488-
action_exec_cmd = args.get('action_exec_cmd')
489487
check_status = args.get('check_status')
490-
488+
config = args.get('config')
489+
491490
self.log(program, 'Action: %s', action_type)
492491
action_list = action_type.split(',')
493492

494493
if 'restart' in action_list:
495-
restart_result = self.action_supervistor_restart(program)
494+
restart_result = self.action_supervisor_restart(program)
496495
msg += '\r\n Restart:%s' % restart_result
497496
elif 'exec' in action_list:
497+
action_exec_cmd = config.get('action_exec_cmd')
498498
exec_result = self.action_exec(program, action_exec_cmd)
499499
msg += '\r\n Exec:%s' % exec_result
500+
elif 'kill' in action_list:
501+
pid_get = config.get('pidGet', 'supervisor')
502+
pid_file = config.get('pidFile', )
503+
pid, err = self.get_pid(program, pid_get, pid_file)
504+
kill_result = self.action_kill(program, pid)
505+
msg += '\r\n Kill:%s' % kill_result
500506

501507
if 'email' in action_list and self.mail_config:
502508
self.action_email(program, action_type, msg, check_status)
503509
if 'wechat' in action_list and self.wechat_config:
504510
self.action_wechat(program, action_type, msg, check_status)
505511

506-
def action_supervistor_restart(self, program):
512+
def action_supervisor_restart(self, program):
507513
"""
508514
通过supervisor的rpc接口重启进程
509515
:param program:
@@ -563,6 +569,30 @@ def action_exec(self, program, cmd):
563569
self.log(program, "Action: exec result %s", result)
564570

565571
return result
572+
573+
def action_kill(self, program, pid):
574+
"""
575+
杀死进程
576+
:param program:
577+
:param pid:
578+
:return:
579+
"""
580+
self.log(program, 'Action: kill')
581+
result = 'success'
582+
583+
if int(pid) < 3:
584+
return 'Failed to kill %s, pid: %s '% (program, exitcode)
585+
586+
cmd = "kill -9 %s" % pid
587+
exitcode, stdout, stderr = shell(cmd)
588+
589+
if exitcode == 0:
590+
self.log(program, "Action: kill result success")
591+
else:
592+
result = 'Failed to kill %s, pid: %s exiting: %s' % (program, pid, exitcode)
593+
self.log(program, "Action: kill result %s", result)
594+
595+
return result
566596

567597
def action_email(self, program, action_type, msg, check_status):
568598
"""
@@ -786,7 +816,7 @@ def sig_handler(signum, frame):
786816
type: mem # 检查类型: http,tcp,mem,cpu 默认: http
787817
maxRss: 1024 # 内存阈值, 超过则为检测失败. 单位MB, 默认: 1024
788818
cumulative: True # 是否统计子进程的内存, 默认: False
789-
pidGet: supervistor # 获取pid的方式: supervistor,name,file, 选择name时,按program名称搜索pid,选择file时,需指定pidFile 默认: supervistor
819+
pidGet: supervisor # 获取pid的方式: supervisor,name,file, 选择name时,按program名称搜索pid,选择file时,需指定pidFile 默认: supervisor
790820
pidFile: /var/run/t.pid # 指定pid文件的路径, 只在pidGet为file的时候有用
791821
periodSeconds: 10 # 检查的频率(以秒为单位), 默认: 5
792822
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1
@@ -800,7 +830,7 @@ def sig_handler(signum, frame):
800830
cat2: # supervisor中配置的program名称
801831
type: cpu # 检查类型: http,tcp,mem,cpu 默认: http
802832
maxCpu: 80 # CPU阈值, 超过则为检测失败. 单位% 默认: 90%
803-
pidGet: supervistor # 获取pid的方式: supervistor,name,file, 选择name时,按program名称搜索pid,选择file时,需指定pidFile 默认: supervistor
833+
pidGet: supervisor # 获取pid的方式: supervisor,name,file, 选择name时,按program名称搜索pid,选择file时,需指定pidFile 默认: supervisor
804834
pidFile: /var/run/t.pid # 指定pid文件的路径, 只在pidGet为file的时候有用
805835
periodSeconds: 10 # 检查的频率(以秒为单位), 默认: 5
806836
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1

0 commit comments

Comments
 (0)