Skip to content

Commit f382a18

Browse files
invertegoLukeUsher
authored andcommitted
libco: update Windows thread information block
Update the stack base and limit in the thread information block. This is required to support kernel stack unwinding, which is used for both C++ exception dispatch and setjmp/longjmp on Windows x64 and ARM64. Add a cothread entry routine that consistently aborts when the user supplied routine returns. Debugger stack backtraces now stop consistently at this entry routine.
1 parent f63cad6 commit f382a18

File tree

3 files changed

+92
-50
lines changed

3 files changed

+92
-50
lines changed

libco/aarch64.c

+21-3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ static const uint32_t co_swap_function[1024] = {
4444
0x6d49340c, /* ldp d12,d13,[x0,144] */
4545
0x6d0a3c2e, /* stp d14,d15,[x1,160] */
4646
0x6d4a3c0e, /* ldp d14,d15,[x0,160] */
47+
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
48+
0xa940c650, /* ldp x16,x17,[x18, 8] */
49+
0xa90b4430, /* stp x16,x17,[x1,176] */
50+
0xa94b4410, /* ldp x16,x17,[x0,176] */
51+
0xa900c650, /* stp x16,x17,[x18, 8] */
52+
#endif
4753
0xd61f03c0, /* br x30 */
4854
};
4955

@@ -72,6 +78,13 @@ static void co_init() {
7278
}
7379
#endif
7480

81+
static void co_entrypoint(cothread_t handle) {
82+
uintptr_t* buffer = (uintptr_t*)handle;
83+
void (*entrypoint)(void) = (void (*)(void))buffer[2];
84+
entrypoint();
85+
abort(); /* called only if cothread_t entrypoint returns */
86+
}
87+
7588
cothread_t co_active() {
7689
if(!co_active_handle) co_active_handle = &co_active_buffer;
7790
return co_active_handle;
@@ -88,9 +101,14 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void))
88101
if(handle = (uintptr_t*)memory) {
89102
unsigned int offset = (size & ~15);
90103
uintptr_t* p = (uintptr_t*)((unsigned char*)handle + offset);
91-
handle[0] = (uintptr_t)p; /* x16 (stack pointer) */
92-
handle[1] = (uintptr_t)entrypoint; /* x30 (link register) */
93-
handle[12] = (uintptr_t)p; /* x29 (frame pointer) */
104+
handle[0] = (uintptr_t)p; /* x16 (stack pointer) */
105+
handle[1] = (uintptr_t)co_entrypoint; /* x30 (link register) */
106+
handle[2] = (uintptr_t)entrypoint; /* x19 (entry point) */
107+
handle[12] = (uintptr_t)p; /* x29 (frame pointer) */
108+
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
109+
handle[22] = (uintptr_t)handle + size; /* stack base */
110+
handle[23] = (uintptr_t)handle; /* stack limit */
111+
#endif
94112
}
95113

96114
return handle;

libco/amd64.c

+66-47
Original file line numberDiff line numberDiff line change
@@ -21,52 +21,60 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
2121
#ifdef _WIN32
2222
/* ABI: Win64 */
2323
static const unsigned char co_swap_function[4096] = {
24-
0x48, 0x89, 0x22, /* mov [rdx],rsp */
25-
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
26-
0x58, /* pop rax */
27-
0x48, 0x89, 0x6a, 0x08, /* mov [rdx+ 8],rbp */
28-
0x48, 0x89, 0x72, 0x10, /* mov [rdx+16],rsi */
29-
0x48, 0x89, 0x7a, 0x18, /* mov [rdx+24],rdi */
30-
0x48, 0x89, 0x5a, 0x20, /* mov [rdx+32],rbx */
31-
0x4c, 0x89, 0x62, 0x28, /* mov [rdx+40],r12 */
32-
0x4c, 0x89, 0x6a, 0x30, /* mov [rdx+48],r13 */
33-
0x4c, 0x89, 0x72, 0x38, /* mov [rdx+56],r14 */
34-
0x4c, 0x89, 0x7a, 0x40, /* mov [rdx+64],r15 */
24+
0x48, 0x89, 0x22, /* mov [rdx],rsp */
25+
0x48, 0x8b, 0x21, /* mov rsp,[rcx] */
26+
0x58, /* pop rax */
27+
0x48, 0x83, 0xe9, 0x80, /* sub rcx,-0x80 */
28+
0x48, 0x83, 0xea, 0x80, /* sub rdx,-0x80 */
29+
0x48, 0x89, 0x6a, 0x88, /* mov [rdx-0x78],rbp */
30+
0x48, 0x89, 0x72, 0x90, /* mov [rdx-0x70],rsi */
31+
0x48, 0x89, 0x7a, 0x98, /* mov [rdx-0x68],rdi */
32+
0x48, 0x89, 0x5a, 0xa0, /* mov [rdx-0x60],rbx */
33+
0x4c, 0x89, 0x62, 0xa8, /* mov [rdx-0x58],r12 */
34+
0x4c, 0x89, 0x6a, 0xb0, /* mov [rdx-0x50],r13 */
35+
0x4c, 0x89, 0x72, 0xb8, /* mov [rdx-0x48],r14 */
36+
0x4c, 0x89, 0x7a, 0xc0, /* mov [rdx-0x40],r15 */
3537
#if !defined(LIBCO_NO_SSE)
36-
0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+ 80],xmm6 */
37-
0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+ 96],xmm7 */
38-
0x44, 0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+112],xmm8 */
39-
0x48, 0x83, 0xc2, 0x70, /* add rdx,112 */
40-
0x44, 0x0f, 0x29, 0x4a, 0x10, /* movaps [rdx+ 16],xmm9 */
41-
0x44, 0x0f, 0x29, 0x52, 0x20, /* movaps [rdx+ 32],xmm10 */
42-
0x44, 0x0f, 0x29, 0x5a, 0x30, /* movaps [rdx+ 48],xmm11 */
43-
0x44, 0x0f, 0x29, 0x62, 0x40, /* movaps [rdx+ 64],xmm12 */
44-
0x44, 0x0f, 0x29, 0x6a, 0x50, /* movaps [rdx+ 80],xmm13 */
45-
0x44, 0x0f, 0x29, 0x72, 0x60, /* movaps [rdx+ 96],xmm14 */
46-
0x44, 0x0f, 0x29, 0x7a, 0x70, /* movaps [rdx+112],xmm15 */
38+
0x0f, 0x29, 0x72, 0xd0, /* movaps [rdx-0x30],xmm6 */
39+
0x0f, 0x29, 0x7a, 0xe0, /* movaps [rdx-0x20],xmm7 */
40+
0x44, 0x0f, 0x29, 0x42, 0xf0, /* movaps [rdx-0x10],xmm8 */
41+
0x44, 0x0f, 0x29, 0x0a, /* movaps [rdx], xmm9 */
42+
0x44, 0x0f, 0x29, 0x52, 0x10, /* movaps [rdx+0x10],xmm10 */
43+
0x44, 0x0f, 0x29, 0x5a, 0x20, /* movaps [rdx+0x20],xmm11 */
44+
0x44, 0x0f, 0x29, 0x62, 0x30, /* movaps [rdx+0x30],xmm12 */
45+
0x44, 0x0f, 0x29, 0x6a, 0x40, /* movaps [rdx+0x40],xmm13 */
46+
0x44, 0x0f, 0x29, 0x72, 0x50, /* movaps [rdx+0x50],xmm14 */
47+
0x44, 0x0f, 0x29, 0x7a, 0x60, /* movaps [rdx+0x60],xmm15 */
4748
#endif
48-
0x48, 0x8b, 0x69, 0x08, /* mov rbp,[rcx+ 8] */
49-
0x48, 0x8b, 0x71, 0x10, /* mov rsi,[rcx+16] */
50-
0x48, 0x8b, 0x79, 0x18, /* mov rdi,[rcx+24] */
51-
0x48, 0x8b, 0x59, 0x20, /* mov rbx,[rcx+32] */
52-
0x4c, 0x8b, 0x61, 0x28, /* mov r12,[rcx+40] */
53-
0x4c, 0x8b, 0x69, 0x30, /* mov r13,[rcx+48] */
54-
0x4c, 0x8b, 0x71, 0x38, /* mov r14,[rcx+56] */
55-
0x4c, 0x8b, 0x79, 0x40, /* mov r15,[rcx+64] */
49+
0x48, 0x8b, 0x69, 0x88, /* mov rbp,[rcx-0x78] */
50+
0x48, 0x8b, 0x71, 0x90, /* mov rsi,[rcx-0x70] */
51+
0x48, 0x8b, 0x79, 0x98, /* mov rdi,[rcx-0x68] */
52+
0x48, 0x8b, 0x59, 0xa0, /* mov rbx,[rcx-0x60] */
53+
0x4c, 0x8b, 0x61, 0xa8, /* mov r12,[rcx-0x58] */
54+
0x4c, 0x8b, 0x69, 0xb0, /* mov r13,[rcx-0x50] */
55+
0x4c, 0x8b, 0x71, 0xb8, /* mov r14,[rcx-0x48] */
56+
0x4c, 0x8b, 0x79, 0xc0, /* mov r15,[rcx-0x40] */
5657
#if !defined(LIBCO_NO_SSE)
57-
0x0f, 0x28, 0x71, 0x50, /* movaps xmm6, [rcx+ 80] */
58-
0x0f, 0x28, 0x79, 0x60, /* movaps xmm7, [rcx+ 96] */
59-
0x44, 0x0f, 0x28, 0x41, 0x70, /* movaps xmm8, [rcx+112] */
60-
0x48, 0x83, 0xc1, 0x70, /* add rcx,112 */
61-
0x44, 0x0f, 0x28, 0x49, 0x10, /* movaps xmm9, [rcx+ 16] */
62-
0x44, 0x0f, 0x28, 0x51, 0x20, /* movaps xmm10,[rcx+ 32] */
63-
0x44, 0x0f, 0x28, 0x59, 0x30, /* movaps xmm11,[rcx+ 48] */
64-
0x44, 0x0f, 0x28, 0x61, 0x40, /* movaps xmm12,[rcx+ 64] */
65-
0x44, 0x0f, 0x28, 0x69, 0x50, /* movaps xmm13,[rcx+ 80] */
66-
0x44, 0x0f, 0x28, 0x71, 0x60, /* movaps xmm14,[rcx+ 96] */
67-
0x44, 0x0f, 0x28, 0x79, 0x70, /* movaps xmm15,[rcx+112] */
58+
0x0f, 0x28, 0x71, 0xd0, /* movaps xmm6, [rcx-0x30] */
59+
0x0f, 0x28, 0x79, 0xe0, /* movaps xmm7, [rcx-0x20] */
60+
0x44, 0x0f, 0x28, 0x41, 0xf0, /* movaps xmm8, [rcx-0x10] */
61+
0x44, 0x0f, 0x28, 0x09, /* movaps xmm9, [rcx] */
62+
0x44, 0x0f, 0x28, 0x51, 0x10, /* movaps xmm10,[rcx+0x10] */
63+
0x44, 0x0f, 0x28, 0x59, 0x20, /* movaps xmm11,[rcx+0x20] */
64+
0x44, 0x0f, 0x28, 0x61, 0x30, /* movaps xmm12,[rcx+0x30] */
65+
0x44, 0x0f, 0x28, 0x69, 0x40, /* movaps xmm13,[rcx+0x40] */
66+
0x44, 0x0f, 0x28, 0x71, 0x50, /* movaps xmm14,[rcx+0x50] */
67+
0x44, 0x0f, 0x28, 0x79, 0x60, /* movaps xmm15,[rcx+0x60] */
6868
#endif
69-
0xff, 0xe0, /* jmp rax */
69+
#if !defined(LIBCO_NO_TIB)
70+
0x65, 0x4c, 0x8b, 0x04, 0x25, /* mov r8,gs:0x30 */
71+
0x30, 0x00, 0x00, 0x00,
72+
0x41, 0x0f, 0x10, 0x40, 0x08, /* movups xmm0,[r8+0x8] */
73+
0x0f, 0x29, 0x42, 0x70, /* movaps [rdx+0x70],xmm0 */
74+
0x0f, 0x28, 0x41, 0x70, /* movaps xmm0,[rcx+0x70] */
75+
0x41, 0x0f, 0x11, 0x40, 0x08, /* movups [r8+0x8],xmm0 */
76+
#endif
77+
0xff, 0xe0, /* jmp rax */
7078
};
7179

7280
#include <windows.h>
@@ -113,8 +121,14 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
113121
}
114122
#endif
115123

116-
static void crash() {
117-
assert(0); /* called only if cothread_t entrypoint returns */
124+
static void co_entrypoint(cothread_t handle) {
125+
long long* buffer = (long long*)handle;
126+
#ifdef _WIN32
127+
buffer -= 16;
128+
#endif
129+
void (*entrypoint)(void) = (void (*)(void))buffer[1];
130+
entrypoint();
131+
abort(); /* called only if cothread_t entrypoint returns */
118132
}
119133

120134
cothread_t co_active() {
@@ -133,9 +147,14 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void))
133147
if(handle = (cothread_t)memory) {
134148
unsigned int offset = (size & ~15) - 32;
135149
long long *p = (long long*)((char*)handle + offset); /* seek to top of stack */
136-
*--p = (long long)crash; /* crash if entrypoint returns */
137-
*--p = (long long)entrypoint; /* start of function */
138-
*(long long*)handle = (long long)p; /* stack pointer */
150+
*--p = (long long)0; /* crash if entrypoint returns */
151+
*--p = (long long)co_entrypoint;
152+
((long long*)handle)[0] = (long long)p; /* stack pointer */
153+
((long long*)handle)[1] = (long long)entrypoint; /* start of function */
154+
#if defined(_WIN32) && !defined(LIBCO_NO_TIB)
155+
((long long*)handle)[30] = (long long)handle + size; /* stack base */
156+
((long long*)handle)[31] = (long long)handle; /* stack limit */
157+
#endif
139158
}
140159

141160
return handle;

libco/settings.h

+5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
do not use this unless you are certain your application won't use SSE */
1111
/* #define LIBCO_NO_SSE */
1212

13+
/*[amd64, aarch64]:
14+
Win64 only: provides a small speed-up, but will break stack unwinding
15+
do not use this if your application uses exceptions or setjmp/longjmp */
16+
/* #define LIBCO_NO_TIB */
17+
1318
#if defined(LIBCO_C)
1419
#if defined(LIBCO_MP)
1520
#define thread_local __thread

0 commit comments

Comments
 (0)