21
21
import platform
22
22
import threading
23
23
import subprocess
24
+ import hmac
25
+ from hashlib import sha256
24
26
from email .header import Header
25
27
from email .mime .text import MIMEText
26
28
from collections import namedtuple
@@ -69,15 +71,16 @@ def shell(cmd):
69
71
proc .wait ()
70
72
return (proc .returncode ,) + proc .communicate ()
71
73
74
+
72
75
def drop_cache ():
73
76
"""
74
77
清除缓存, 1: pagecache, 2: dentries and inodes, 3: 1+2
75
78
"""
76
79
cmd = "sync && echo 1 > /proc/sys/vm/drop_caches"
77
80
exitcode , _ , _ = shell (cmd )
78
-
79
81
return exitcode
80
82
83
+
81
84
def get_proc_cpu (pid ):
82
85
"""
83
86
获取进程CPU使用率
@@ -98,6 +101,7 @@ def get_proc_cpu(pid):
98
101
return None
99
102
return cpu_utilization
100
103
104
+
101
105
def get_proc_mem (pid , type = "rss" ):
102
106
"""
103
107
获取进程内存使用
@@ -170,12 +174,14 @@ def __init__(self, config):
170
174
self .mail_config = None
171
175
self .wechat_config = None
172
176
self .dingding_config = None
177
+ self .feishu_config = None
173
178
self .supervisord_url = 'unix:///var/run/supervisor.sock'
174
179
175
180
if 'config' in config :
176
181
self .mail_config = config ['config' ].get ('mail' )
177
182
self .wechat_config = config ['config' ].get ('wechat' )
178
183
self .dingding_config = config ['config' ].get ('dingding' )
184
+ self .feishu_config = config ['config' ].get ('feishu' )
179
185
self .supervisord_url = config ['config' ].get ('supervisordUrl' , self .supervisord_url )
180
186
self .supervisord_user = config ['config' ].get ('supervisordUser' , None )
181
187
self .supervisord_pass = config ['config' ].get ('supervisordPass' , None )
@@ -184,7 +190,7 @@ def __init__(self, config):
184
190
self .program_config = config
185
191
186
192
# 只保留通知action
187
- self .notice_action = ['email' , 'wechat' , 'dingding' ]
193
+ self .notice_action = ['email' , 'wechat' , 'dingding' , 'feishu' ]
188
194
189
195
self .periodSeconds = 5
190
196
self .failureThreshold = 3
@@ -516,24 +522,26 @@ def action(self, program, **args):
516
522
517
523
if 'restart' in action_list :
518
524
restart_result = self .action_supervisor_restart (program )
519
- msg += '\r \n Restart:%s' % restart_result
525
+ msg += '\r \n ** Restart** :%s' % restart_result
520
526
elif 'exec' in action_list :
521
527
action_exec_cmd = config .get ('action_exec_cmd' )
522
528
exec_result = self .action_exec (program , action_exec_cmd )
523
- msg += '\r \n Exec:%s' % exec_result
529
+ msg += '\r \n ** Exec** :%s' % exec_result
524
530
elif 'kill' in action_list :
525
531
pid_get = config .get ('pidGet' , 'supervisor' )
526
532
pid_file = config .get ('pidFile' , )
527
533
pid , err = self .get_pid (program , pid_get , pid_file )
528
534
kill_result = self .action_kill (program , pid )
529
- msg += '\r \n Kill:%s' % kill_result
535
+ msg += '\r \n ** Kill** :%s' % kill_result
530
536
531
537
if 'email' in action_list and self .mail_config :
532
538
self .action_email (program , action_type , msg , check_status )
533
539
if 'wechat' in action_list and self .wechat_config :
534
540
self .action_wechat (program , action_type , msg , check_status )
535
541
if 'dingding' in action_list and self .dingding_config :
536
542
self .action_dingding (program , action_type , msg , check_status )
543
+ if 'feishu' in action_list and self .feishu_config :
544
+ self .action_feishu (program , action_type , msg , check_status )
537
545
538
546
def action_supervisor_restart (self , program ):
539
547
"""
@@ -710,6 +718,7 @@ def action_wechat(self, program, action_type, msg, check_status):
710
718
}
711
719
712
720
access_token_url = '/cgi-bin/gettoken?corpid={id}&corpsecret={crt}' .format (id = corpid , crt = secret )
721
+
713
722
try :
714
723
httpClient = httplib .HTTPSConnection (host , timeout = 10 )
715
724
httpClient .request ("GET" , access_token_url , headers = headers )
@@ -806,13 +815,14 @@ def action_dingding(self, program, action_type, msg, check_status):
806
815
else :
807
816
title = "[%s] Health check failed" % program
808
817
809
- data = {"msgtype" : "markdown" ,
810
- "markdown" : {
811
- "title" : title ,
812
- "text" : "#### 详情信息: \n > Program:%s \n \n > DataTime: %s \n \n > Hostname: %s \n \n > Platfrom: %s \n \n > Msg:%s" % (
813
- program , curr_dt , hostname , system_platform , msg )
814
- }
815
- }
818
+ data = {
819
+ "msgtype" : "markdown" ,
820
+ "markdown" : {
821
+ "title" : title ,
822
+ "text" : "#### 详情信息: \n > Program:%s \n \n > DataTime: %s \n \n > Hostname: %s \n \n > Platfrom: %s \n \n > Msg:%s" % (
823
+ program , curr_dt , hostname , system_platform , msg )
824
+ }
825
+ }
816
826
817
827
try :
818
828
httpClient = httplib .HTTPSConnection (host , timeout = 10 )
@@ -832,6 +842,125 @@ def action_dingding(self, program, action_type, msg, check_status):
832
842
self .log (program , '[Action: dingding] send success' )
833
843
return True
834
844
845
+ def action_feishu (self , program , action_type , msg , check_status ):
846
+ """
847
+ 飞书通知
848
+ :param program:
849
+ :param action_type:
850
+ :param msg:
851
+ :param check_status:
852
+ :return:
853
+ """
854
+ host = "open.feishu.cn"
855
+
856
+ secret = self .feishu_config .get ('secret' )
857
+ webhook = self .feishu_config .get ('webhook' )
858
+
859
+ headers = {
860
+ 'Content-Type' : 'application/json'
861
+ }
862
+ send_url = "/open-apis/bot/v2/hook/{webhook}" .format (webhook = webhook )
863
+
864
+ ip = ""
865
+ s = socket .socket (socket .AF_INET , socket .SOCK_DGRAM )
866
+ try :
867
+ s .connect (('8.8.8.8' , 80 ))
868
+ ip = s .getsockname ()[0 ]
869
+ except Exception as e :
870
+ self .log (program , '[Action: feishu] get ip error %s' % e )
871
+ finally :
872
+ s .close ()
873
+
874
+ hostname = platform .node ().split ('.' )[0 ]
875
+ system_platform = platform .platform ()
876
+
877
+ curr_dt = datetime .datetime .now ().strftime ('%Y-%m-%d %H:%M:%S' )
878
+
879
+ if check_status == 'success' :
880
+ title = "[Supervisor] %s Health check successful" % program
881
+ title_color = "green"
882
+ else :
883
+ title = "[Supervisor] %s Health check failed" % program
884
+ title_color = "red"
885
+
886
+ content = "**DataTime**: {curr_dt}\n **Program**: {program}\n **IP**: {ip}\n **Hostname**: {hostname}\n **Platfrom**: {platfrom}\n **Action**: {action}\n **Msg**: {msg}" .format (
887
+ curr_dt = curr_dt , program = program , ip = ip , hostname = hostname ,
888
+ platfrom = system_platform , action = action_type , msg = msg )
889
+
890
+ data = {
891
+ "msg_type" : "interactive" ,
892
+ "card" : {
893
+ "config" : {
894
+ "wide_screen_mode" : True ,
895
+ "enable_forward" : True
896
+ },
897
+ "header" : {
898
+ "title" : {
899
+ "content" : title ,
900
+ "tag" : "plain_text"
901
+ },
902
+ "template" : title_color
903
+ },
904
+ "elements" : [{
905
+ "tag" : "div" ,
906
+ "text" : {
907
+ "content" : "详细信息:" ,
908
+ "tag" : "lark_md"
909
+ },
910
+ "fields" : [
911
+ {
912
+ "is_short" : False ,
913
+ "text" : {
914
+ "tag" : "lark_md" ,
915
+ "content" : content
916
+ }
917
+ }]
918
+
919
+ }]
920
+
921
+ }
922
+ }
923
+
924
+ if secret != "" :
925
+
926
+ msg = ""
927
+ timestamp = ""
928
+ if PY3 :
929
+ timestamp = str (round (time .time ()))
930
+ key = '{}\n {}' .format (timestamp , secret )
931
+ key_enc = key .encode ('utf-8' )
932
+ msg_enc = msg .encode ('utf-8' )
933
+ else :
934
+ print ("python2" )
935
+ timestamp = long (round (time .time ()))
936
+ key = '{}\n {}' .format (timestamp , secret )
937
+ key_enc = bytes (key ).encode ('utf-8' )
938
+ msg_enc = bytes (msg ).encode ('utf-8' )
939
+
940
+ hmac_code = hmac .new (key_enc , msg_enc , digestmod = sha256 ).digest ()
941
+ sign = base64 .b64encode (hmac_code ).decode ('utf-8' )
942
+ data ['timestamp' ] = timestamp
943
+ data ['sign' ] = sign
944
+ print (data )
945
+
946
+ httpClient = httplib .HTTPSConnection (host , timeout = 10 )
947
+ try :
948
+ httpClient .request ("POST" , send_url , json .dumps (data ), headers = headers )
949
+ response = httpClient .getresponse ()
950
+ result = json .loads (response .read ())
951
+ if result .get ('StatusCode' , 1 ) != 0 :
952
+ self .log (program , '[Action: feishu] send faild %s' % result )
953
+ return False
954
+ except Exception as e :
955
+ self .log (program , '[Action: feishu] send error [%s] %s' % (result , e ))
956
+ return False
957
+ finally :
958
+ if httpClient :
959
+ httpClient .close ()
960
+
961
+ self .log (program , '[Action: feishu] send success' )
962
+ return True
963
+
835
964
def start (self ):
836
965
"""
837
966
启动检测
@@ -900,7 +1029,10 @@ def sig_handler(signum, frame):
900
1029
# totag:
901
1030
# dingding: # 钉钉通知配置
902
1031
access_token:
903
-
1032
+ # feishu: # 飞书通知配置
1033
+ webhook:
1034
+ secret:
1035
+
904
1036
# 内存方式监控
905
1037
cat1: # supervisor中配置的program名称
906
1038
type: mem # 检查类型: http,tcp,mem,cpu 默认: http
@@ -912,9 +1044,9 @@ def sig_handler(signum, frame):
912
1044
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1
913
1045
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
914
1046
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
915
- action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥 ,同时设置时restart生效) 默认: restart
1047
+ action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥 ,同时设置时restart生效) 默认: restart
916
1048
execCmd: command # action exec 的执行命令
917
- sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
1049
+ sendResolved: True # 是否发送恢复通知 默认: False
918
1050
919
1051
# cpu方式监控
920
1052
cat2: # supervisor中配置的program名称
@@ -926,9 +1058,9 @@ def sig_handler(signum, frame):
926
1058
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1
927
1059
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
928
1060
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
929
- action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥 ,同时设置时restart生效) 默认: restart
1061
+ action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥 ,同时设置时restart生效) 默认: restart
930
1062
execCmd: command # action exec 的执行命令
931
- sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
1063
+ sendResolved: True # 是否发送恢复通知 默认: False
932
1064
933
1065
# HTTP方式监控
934
1066
cat3:
@@ -946,9 +1078,9 @@ def sig_handler(signum, frame):
946
1078
timeoutSeconds: 5 # 检查超时的秒数, 默认: 3
947
1079
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
948
1080
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
949
- action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥 ,同时设置时restart生效) 默认: restart
1081
+ action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥 ,同时设置时restart生效) 默认: restart
950
1082
execCmd: command # action exec 的执行命令
951
- sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
1083
+ sendResolved: True # 是否发送恢复通知 默认: False
952
1084
953
1085
# TCP方式监控
954
1086
cat4:
@@ -960,9 +1092,9 @@ def sig_handler(signum, frame):
960
1092
timeoutSeconds: 5 # 检查超时的秒数, 默认: 3
961
1093
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
962
1094
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
963
- action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥 ,同时设置时restart生效) 默认: restart
1095
+ action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥 ,同时设置时restart生效) 默认: restart
964
1096
execCmd: command # action exec 的执行命令
965
- sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
1097
+ sendResolved: True # 是否发送恢复通知 默认: False
966
1098
"""
967
1099
with open (config_file , 'w' ) as f :
968
1100
f .write (example_config )
0 commit comments