forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
julia_threads.h
377 lines (339 loc) · 12.7 KB
/
julia_threads.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
// This file is a part of Julia. License is MIT: https://julialang.org/license
// Meant to be included in <julia.h>
#ifndef JL_THREADS_H
#define JL_THREADS_H
#include "julia_atomics.h"
// threading ------------------------------------------------------------------
#ifdef __cplusplus
extern "C" {
#endif
JL_DLLEXPORT int16_t jl_threadid(void);
JL_DLLEXPORT void jl_threading_profile(void);
// JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user
// When running into scheduler issues, this may help provide information on the
// sequence of events that led to the issue. Normally, it is empty.
//#define JULIA_DEBUG_SLEEPWAKE(x) x
#define JULIA_DEBUG_SLEEPWAKE(x)
// Options for task switching algorithm (in order of preference):
// JL_HAVE_ASM -- mostly setjmp
// JL_HAVE_ASM && JL_HAVE_UNW_CONTEXT -- libunwind-based
// JL_HAVE_UNW_CONTEXT -- libunwind-based
// JL_HAVE_ASYNCIFY -- task switching based on the binary asyncify transform
// JL_HAVE_UCONTEXT -- posix standard API, requires syscall for resume
// JL_HAVE_SIGALTSTACK -- requires several syscall for start, setjmp for resume
#ifdef _OS_WINDOWS_
#define JL_HAVE_UCONTEXT
typedef win32_ucontext_t jl_stack_context_t;
typedef jl_stack_context_t _jl_ucontext_t;
#else
typedef struct {
jl_jmp_buf uc_mcontext;
} jl_stack_context_t;
#if !defined(JL_HAVE_UCONTEXT) && \
!defined(JL_HAVE_ASM) && \
!defined(JL_HAVE_UNW_CONTEXT) && \
!defined(JL_HAVE_SIGALTSTACK) && \
!defined(JL_HAVE_ASYNCIFY)
#if (defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_AARCH64_) || \
defined(_CPU_ARM_) || defined(_CPU_PPC64_))
#define JL_HAVE_ASM
#endif
#if 0
// very slow, but more debugging
//#elif defined(_OS_DARWIN_)
//#define JL_HAVE_UNW_CONTEXT
//#elif defined(_OS_LINUX_)
//#define JL_HAVE_UNW_CONTEXT
#elif defined(_OS_EMSCRIPTEN_)
#define JL_HAVE_ASYNCIFY
#elif !defined(JL_HAVE_ASM)
#define JL_HAVE_UNW_CONTEXT // optimistically?
#endif
#endif
#if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
typedef jl_stack_context_t _jl_ucontext_t;
#endif
#if defined(JL_HAVE_ASYNCIFY)
#if defined(_COMPILER_TSAN_ENABLED_)
#error TSAN not currently supported with asyncify
#endif
typedef struct {
// This is the extent of the asyncify stack, but because the top of the
// asyncify stack (stacktop) is also the bottom of the C stack, we can
// reuse stacktop for both. N.B.: This matches the layout of the
// __asyncify_data struct.
void *stackbottom;
void *stacktop;
} _jl_ucontext_t;
#endif
#if defined(JL_HAVE_UNW_CONTEXT)
#define UNW_LOCAL_ONLY
#include <libunwind.h>
typedef unw_context_t _jl_ucontext_t;
#endif
#if defined(JL_HAVE_UCONTEXT)
#include <ucontext.h>
typedef ucontext_t _jl_ucontext_t;
#endif
#endif
typedef struct {
union {
_jl_ucontext_t ctx;
jl_stack_context_t copy_ctx;
};
#if defined(_COMPILER_TSAN_ENABLED_)
void *tsan_state;
#endif
} jl_ucontext_t;
// handle to reference an OS thread
#ifdef _OS_WINDOWS_
typedef DWORD jl_thread_t;
#else
typedef pthread_t jl_thread_t;
#endif
struct _jl_task_t;
// Recursive spin lock
typedef struct {
_Atomic(struct _jl_task_t*) owner;
uint32_t count;
} jl_mutex_t;
typedef struct {
jl_taggedvalue_t *freelist; // root of list of free objects
jl_taggedvalue_t *newpages; // root of list of chunks of free objects
uint16_t osize; // size of objects in this pool
} jl_gc_pool_t;
typedef struct {
_Atomic(int64_t) allocd;
_Atomic(int64_t) freed;
_Atomic(uint64_t) malloc;
_Atomic(uint64_t) realloc;
_Atomic(uint64_t) poolalloc;
_Atomic(uint64_t) bigalloc;
_Atomic(uint64_t) freecall;
} jl_thread_gc_num_t;
typedef struct {
// variable for tracking weak references
arraylist_t weak_refs;
// live tasks started on this thread
// that are holding onto a stack from the pool
arraylist_t live_tasks;
// variables for tracking malloc'd arrays
struct _mallocarray_t *mallocarrays;
struct _mallocarray_t *mafreelist;
// variables for tracking big objects
struct _bigval_t *big_objects;
// variables for tracking "remembered set"
arraylist_t rem_bindings;
arraylist_t _remset[2]; // contains jl_value_t*
// lower bound of the number of pointers inside remembered values
int remset_nptr;
arraylist_t *remset;
arraylist_t *last_remset;
// variables for allocating objects from pools
#ifdef _P64
# define JL_GC_N_POOLS 49
#elif MAX_ALIGN == 8
# define JL_GC_N_POOLS 50
#else
# define JL_GC_N_POOLS 51
#endif
jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
#define JL_N_STACK_POOLS 16
arraylist_t free_stacks[JL_N_STACK_POOLS];
} jl_thread_heap_t;
// Cache of thread local change to global metadata during GC
// This is sync'd after marking.
typedef union _jl_gc_mark_data jl_gc_mark_data_t;
typedef struct {
void **pc; // Current stack address for the pc (up growing)
jl_gc_mark_data_t *data; // Current stack address for the data (up growing)
void **pc_start; // Cached value of `gc_cache->pc_stack`
void **pc_end; // Cached value of `gc_cache->pc_stack_end`
} jl_gc_mark_sp_t;
typedef struct {
// thread local increment of `perm_scanned_bytes`
size_t perm_scanned_bytes;
// thread local increment of `scanned_bytes`
size_t scanned_bytes;
// Number of queued big objects (<= 1024)
size_t nbig_obj;
// Array of queued big objects to be moved between the young list
// and the old list.
// A set low bit means that the object should be moved from the old list
// to the young list (`mark_reset_age`).
// Objects can only be put into this list when the mark bit is flipped to
// `1` (atomically). Combining with the sync after marking,
// this makes sure that a single objects can only appear once in
// the lists (the mark bit cannot be flipped to `0` without sweeping)
void *big_obj[1024];
uv_mutex_t stack_lock;
void **pc_stack;
void **pc_stack_end;
jl_gc_mark_data_t *data_stack;
} jl_gc_mark_cache_t;
struct _jl_bt_element_t;
// This includes all the thread local states we care about for a thread.
// Changes to TLS field types must be reflected in codegen.
#define JL_MAX_BT_SIZE 80000
typedef struct _jl_tls_states_t {
int16_t tid;
uint64_t rngseed;
volatile size_t *safepoint;
_Atomic(int8_t) sleep_check_state; // read/write from foreign threads
// Whether it is safe to execute GC at the same time.
#define JL_GC_STATE_WAITING 1
// gc_state = 1 means the thread is doing GC or is waiting for the GC to
// finish.
#define JL_GC_STATE_SAFE 2
// gc_state = 2 means the thread is running unmanaged code that can be
// execute at the same time with the GC.
_Atomic(int8_t) gc_state; // read from foreign threads
// execution of certain certain impure
// statements is prohibited from certain
// callbacks (such as generated functions)
// as it may make compilation undecidable
int8_t in_pure_callback;
int8_t in_finalizer;
int8_t disable_gc;
// Counter to disable finalizer **on the current thread**
int finalizers_inhibited;
jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
jl_thread_gc_num_t gc_num;
uv_mutex_t sleep_lock;
uv_cond_t wake_signal;
volatile sig_atomic_t defer_signal;
_Atomic(struct _jl_task_t*) current_task;
struct _jl_task_t *next_task;
struct _jl_task_t *previous_task;
struct _jl_task_t *root_task;
struct _jl_timing_block_t *timing_stack;
void *stackbase;
size_t stacksize;
union {
_jl_ucontext_t base_ctx; // base context of stack
// This hack is needed to support always_copy_stacks:
jl_stack_context_t copy_stack_ctx;
};
// Temp storage for exception thrown in signal handler. Not rooted.
struct _jl_value_t *sig_exception;
// Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
struct _jl_bt_element_t *bt_data; // JL_MAX_BT_SIZE + 1 elements long
size_t bt_size; // Size for backtrace in transit in bt_data
// Atomically set by the sender, reset by the handler.
volatile _Atomic(sig_atomic_t) signal_request; // TODO: no actual reason for this to be _Atomic
// Allow the sigint to be raised asynchronously
// this is limited to the few places we do synchronous IO
// we can make this more general (similar to defer_signal) if necessary
volatile sig_atomic_t io_wait;
#ifdef _OS_WINDOWS_
int needs_resetstkoflw;
#else
void *signal_stack;
#endif
jl_thread_t system_id;
arraylist_t finalizers;
jl_gc_mark_cache_t gc_cache;
arraylist_t sweep_objs;
jl_gc_mark_sp_t gc_mark_sp;
// Saved exception for previous *external* API call or NULL if cleared.
// Access via jl_exception_occurred().
struct _jl_value_t *previous_exception;
// currently-held locks, to be released when an exception is thrown
small_arraylist_t locks;
JULIA_DEBUG_SLEEPWAKE(
uint64_t uv_run_enter;
uint64_t uv_run_leave;
uint64_t sleep_enter;
uint64_t sleep_leave;
)
} jl_tls_states_t;
typedef jl_tls_states_t *jl_ptls_t;
// Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
#ifdef __MIC__
# define jl_cpu_pause() _mm_delay_64(100)
# define jl_cpu_wake() ((void)0)
# define JL_CPU_WAKE_NOOP 1
#elif defined(_CPU_X86_64_) || defined(_CPU_X86_) /* !__MIC__ */
# define jl_cpu_pause() _mm_pause()
# define jl_cpu_wake() ((void)0)
# define JL_CPU_WAKE_NOOP 1
#elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
# define jl_cpu_pause() __asm__ volatile ("wfe" ::: "memory")
# define jl_cpu_wake() __asm__ volatile ("sev" ::: "memory")
# define JL_CPU_WAKE_NOOP 0
#else
# define jl_cpu_pause() ((void)0)
# define jl_cpu_wake() ((void)0)
# define JL_CPU_WAKE_NOOP 1
#endif
JL_DLLEXPORT void (jl_cpu_pause)(void);
JL_DLLEXPORT void (jl_cpu_wake)(void);
#ifdef __clang_gcanalyzer__
// Note that the sigint safepoint can also trigger GC, albeit less likely
void jl_gc_safepoint_(jl_ptls_t tls);
void jl_sigint_safepoint(jl_ptls_t tls);
#else
// gc safepoint and gc states
// This triggers a SegFault when we are in GC
// Assign it to a variable to make sure the compiler emit the load
// and to avoid Clang warning for -Wunused-volatile-lvalue
#define jl_gc_safepoint_(ptls) do { \
jl_signal_fence(); \
size_t safepoint_load = *ptls->safepoint; \
jl_signal_fence(); \
(void)safepoint_load; \
} while (0)
#define jl_sigint_safepoint(ptls) do { \
jl_signal_fence(); \
size_t safepoint_load = ptls->safepoint[-1]; \
jl_signal_fence(); \
(void)safepoint_load; \
} while (0)
#endif
STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
int8_t old_state)
{
jl_atomic_store_release(&ptls->gc_state, state);
// A safe point is required if we transition from GC-safe region to
// non GC-safe region.
if (old_state && !state)
jl_gc_safepoint_(ptls);
return old_state;
}
STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
int8_t state)
{
return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
}
#ifdef __clang_gcanalyzer__
int8_t jl_gc_unsafe_enter(jl_ptls_t ptls); // Can be a safepoint
int8_t jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT;
int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT;
int8_t jl_gc_safe_leave(jl_ptls_t ptls, int8_t state); // Can be a safepoint
#else
#define jl_gc_unsafe_enter(ptls) jl_gc_state_save_and_set(ptls, 0)
#define jl_gc_unsafe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), 0))
#define jl_gc_safe_enter(ptls) jl_gc_state_save_and_set(ptls, JL_GC_STATE_SAFE)
#define jl_gc_safe_leave(ptls, state) ((void)jl_gc_state_set(ptls, (state), JL_GC_STATE_SAFE))
#endif
JL_DLLEXPORT void (jl_gc_safepoint)(void);
JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on);
JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
extern JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers;
JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
// Copied from libuv. Add `JL_CONST_FUNC` so that the compiler
// can optimize this better.
static inline jl_thread_t JL_CONST_FUNC jl_thread_self(void)
{
#ifdef _OS_WINDOWS_
return GetCurrentThreadId();
#else
return pthread_self();
#endif
}
#ifdef __cplusplus
}
#endif
#endif