Skip to content

Commit

Permalink
[mono] retry mono_threads_pthread_kill if result == EAGAIN on Linux (d…
Browse files Browse the repository at this point in the history
…otnet#33966)

* [mono] retry mono_threads_pthread_kill if result == EAGAIN on Linux

Try to address dotnet#32377
(signal queue overflow) by sleeping and retrying a few times.
  • Loading branch information
lambdageek authored Mar 24, 2020
1 parent eff3797 commit c67efd8
Showing 1 changed file with 29 additions and 5 deletions.
34 changes: 29 additions & 5 deletions src/mono/mono/utils/mono-threads-posix.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,16 +170,25 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum)
{
THREADS_SUSPEND_DEBUG ("sending signal %d to %p[%p]\n", signum, info, mono_thread_info_get_tid (info));

const int signal_queue_ovf_retry_count G_GNUC_UNUSED = 5;
const gulong signal_queue_ovf_sleep_us G_GNUC_UNUSED = 10 * 1000; /* 10 milliseconds */
int retry_count G_GNUC_UNUSED = 0;
int result;

#if defined (__linux__)
redo:
#endif

#ifdef USE_TKILL_ON_ANDROID
int old_errno = errno;
{
int old_errno = errno;

result = tkill (info->native_handle, signum);
result = tkill (info->native_handle, signum);

if (result < 0) {
result = errno;
mono_set_errno (old_errno);
if (result < 0) {
result = errno;
mono_set_errno (old_errno);
}
}
#elif defined (HAVE_PTHREAD_KILL)
result = pthread_kill (mono_thread_info_get_tid (info), signum);
Expand All @@ -204,10 +213,25 @@ mono_threads_pthread_kill (MonoThreadInfo *info, int signum)
result != ESRCH
#if defined (__MACH__) && defined (ENOTSUP)
&& result != ENOTSUP
#endif
#if defined (__linux__)
&& !(result == EAGAIN && retry_count < signal_queue_ovf_retry_count)
#endif
)
g_error ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow", __func__, result);

#if defined (__linux__)
if (result == EAGAIN && retry_count < signal_queue_ovf_retry_count) {
/* HACK: if the signal queue overflows on linux, try again a couple of times.
* Tries to address https://github.com/dotnet/runtime/issues/32377
*/
g_warning ("%s: pthread_kill failed with error %d - potential kernel OOM or signal queue overflow, sleeping for %ld microseconds", __func__, result, signal_queue_ovf_sleep_us);
g_usleep (signal_queue_ovf_sleep_us);
++retry_count;
goto redo;
}
#endif

return result;
}

Expand Down

0 comments on commit c67efd8

Please sign in to comment.