-
Notifications
You must be signed in to change notification settings - Fork 5.6k
/
Copy pathsighandler.py
192 lines (155 loc) · 7.07 KB
/
sighandler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""Utility to support asynchronously signaling the current process."""
import atexit
import os
import signal
import sys
import threading
import time
import traceback
import psutil
from buildscripts.resmokelib import config, parser, reportfile, testing
from buildscripts.resmokelib.flags import HANG_ANALYZER_CALLED
_IS_WINDOWS = sys.platform == "win32"
if _IS_WINDOWS:
import win32api
import win32event
def register(logger, suites, start_time):
"""Register an event object to wait for signal, or a signal handler for SIGUSR1."""
def _handle_sigusr1(signum, frame): # pylint: disable=unused-argument
"""Signal handler for SIGUSR1.
The handler will dump the stacks of all threads and write out the report file and
log suite summaries.
"""
HANG_ANALYZER_CALLED.set()
header_msg = "Dumping stacks due to SIGUSR1 signal"
_dump_and_log(header_msg)
def _handle_set_event(event_handle):
"""Event object handler for Windows.
The handler will dump the stacks of all threads and write out the report file and
log suite summaries.
"""
while True:
try:
# Wait for task time out to dump stacks.
ret = win32event.WaitForSingleObject(event_handle, win32event.INFINITE)
if ret != win32event.WAIT_OBJECT_0:
logger.error("_handle_set_event WaitForSingleObject failed: %d" % ret)
return
except win32event.error as err:
logger.error("Exception from win32event.WaitForSingleObject with error: %s" % err)
else:
HANG_ANALYZER_CALLED.set()
header_msg = "Dumping stacks due to signal from win32event.SetEvent"
_dump_and_log(header_msg)
def _dump_and_log(header_msg):
"""Dump the stacks of all threads, write report file, and log suite summaries."""
_dump_stacks(logger, header_msg)
reportfile.write(suites)
testing.suite.Suite.log_summaries(logger, suites, time.time() - start_time)
if "is_inner_level" not in config.INTERNAL_PARAMS:
# Gather and analyze pids of all subprocesses.
# Do nothing for child resmoke process started by another resmoke process
# (e.g. backup_restore.js) The child processes of the child resmoke will be
# analyzed by the signal handler of the top-level resmoke process.
# i.e. the next few lines of code.
pids_to_analyze = _get_pids()
_analyze_pids(logger, pids_to_analyze)
# On Windows spawn a thread to wait on an event object for signal to dump stacks. For Cygwin
# platforms, we use a signal handler since it supports POSIX signals.
if _IS_WINDOWS:
# Create unique event_name.
event_name = "Global\\Mongo_Python_" + str(os.getpid())
try:
security_attributes = None
manual_reset = False
initial_state = False
task_timeout_handle = win32event.CreateEvent(
security_attributes, manual_reset, initial_state, event_name
)
except win32event.error as err:
logger.error("Exception from win32event.CreateEvent with error: %s" % err)
return
# Register to close event object handle on exit.
atexit.register(win32api.CloseHandle, task_timeout_handle)
# Create thread.
event_handler_thread = threading.Thread(
target=_handle_set_event,
kwargs={"event_handle": task_timeout_handle},
name="windows_event_handler_thread",
)
event_handler_thread.daemon = True
event_handler_thread.start()
else:
# Otherwise register a signal handler
signal.signal(signal.SIGUSR1, _handle_sigusr1)
def _dump_stacks(logger, header_msg):
"""Signal handler that will dump the stacks of all threads."""
sb = []
sb.append(header_msg)
frames = sys._current_frames() # pylint: disable=protected-access
sb.append("Total threads: %d" % (len(frames)))
sb.append("")
for thread_id in frames:
stack = frames[thread_id]
sb.append("Thread %d:" % (thread_id))
sb.append("".join(traceback.format_stack(stack)))
logger.info("\n".join(sb))
def _get_pids():
"""Return all PIDs spawned by the current resmoke process and their child PIDs."""
pids = [] # Gather fixture PIDs + any PIDs spawned by the fixtures.
parent = psutil.Process() # current process
for child in parent.children(recursive=True):
# Don't signal python threads. They have already been signalled in the evergreen timeout
# section.
if "python" not in child.name().lower():
pids.append(child.pid)
return pids
def _analyze_pids(logger, pids):
"""Analyze the PIDs spawned by the current resmoke process."""
# If 'test_analysis' is specified, we will just write the pids out to a file and kill them
# Instead of running analysis. This option will only be specified in resmoke selftests.
if "test_analysis" in config.INTERNAL_PARAMS:
with open(os.path.join(config.DBPATH_PREFIX, "test_analysis.txt"), "w") as analysis_file:
analysis_file.write("\n".join([str(pid) for pid in pids]))
for pid in pids:
try:
proc = psutil.Process(pid)
logger.info("Killing process pid %d", pid)
proc.kill()
except psutil.NoSuchProcess:
# Process has already terminated.
pass
return
# See hang-analyzer argument options here:
# https://github.com/10gen/mongo/blob/8636ede10bd70b32ff4b6cd115132ab0f22b89c7/buildscripts/resmokelib/hang_analyzer/hang_analyzer.py#L245
hang_analyzer_args = [
"hang-analyzer",
"-c",
"-o",
"file",
"-o",
"stdout",
"-k",
"-d",
",".join([str(p) for p in pids]),
]
_hang_analyzer = parser.parse_command_line(hang_analyzer_args, logger=logger)
# Evergreen has a 15 minute timeout for task timeout commands
# Limit the hang analyzer to 12 minutes so there is time for other tasks.
hang_analyzer_hard_timeout = None
if config.EVERGREEN_TASK_ID:
hang_analyzer_hard_timeout = 60 * 12
logger.info(
"Limit the resmoke invoked hang analyzer to 12 minutes so there is time for resmoke to finish up."
)
hang_analyzer_thread = threading.Thread(target=_hang_analyzer.execute, daemon=True)
hang_analyzer_thread.start()
hang_analyzer_thread.join(hang_analyzer_hard_timeout)
if hang_analyzer_thread.is_alive():
logger.warning(
"Resmoke invoked hang analyzer thread did not finish, but will continue running in the background. The thread may be disruputed and may show extraneous output."
)
logger.warning("Cleaning up resmoke child processes so that resmoke can fail gracefully.")
_hang_analyzer.kill_rogue_processes()
else:
logger.info("Done running resmoke invoked hang analyzer thread.")