@@ -21,52 +21,60 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
21
21
#ifdef _WIN32
22
22
/* ABI: Win64 */
23
23
static const unsigned char co_swap_function [4096 ] = {
24
- 0x48 , 0x89 , 0x22 , /* mov [rdx],rsp */
25
- 0x48 , 0x8b , 0x21 , /* mov rsp,[rcx] */
26
- 0x58 , /* pop rax */
27
- 0x48 , 0x89 , 0x6a , 0x08 , /* mov [rdx+ 8],rbp */
28
- 0x48 , 0x89 , 0x72 , 0x10 , /* mov [rdx+16],rsi */
29
- 0x48 , 0x89 , 0x7a , 0x18 , /* mov [rdx+24],rdi */
30
- 0x48 , 0x89 , 0x5a , 0x20 , /* mov [rdx+32],rbx */
31
- 0x4c , 0x89 , 0x62 , 0x28 , /* mov [rdx+40],r12 */
32
- 0x4c , 0x89 , 0x6a , 0x30 , /* mov [rdx+48],r13 */
33
- 0x4c , 0x89 , 0x72 , 0x38 , /* mov [rdx+56],r14 */
34
- 0x4c , 0x89 , 0x7a , 0x40 , /* mov [rdx+64],r15 */
24
+ 0x48 , 0x89 , 0x22 , /* mov [rdx],rsp */
25
+ 0x48 , 0x8b , 0x21 , /* mov rsp,[rcx] */
26
+ 0x58 , /* pop rax */
27
+ 0x48 , 0x83 , 0xe9 , 0x80 , /* sub rcx,-0x80 */
28
+ 0x48 , 0x83 , 0xea , 0x80 , /* sub rdx,-0x80 */
29
+ 0x48 , 0x89 , 0x6a , 0x88 , /* mov [rdx-0x78],rbp */
30
+ 0x48 , 0x89 , 0x72 , 0x90 , /* mov [rdx-0x70],rsi */
31
+ 0x48 , 0x89 , 0x7a , 0x98 , /* mov [rdx-0x68],rdi */
32
+ 0x48 , 0x89 , 0x5a , 0xa0 , /* mov [rdx-0x60],rbx */
33
+ 0x4c , 0x89 , 0x62 , 0xa8 , /* mov [rdx-0x58],r12 */
34
+ 0x4c , 0x89 , 0x6a , 0xb0 , /* mov [rdx-0x50],r13 */
35
+ 0x4c , 0x89 , 0x72 , 0xb8 , /* mov [rdx-0x48],r14 */
36
+ 0x4c , 0x89 , 0x7a , 0xc0 , /* mov [rdx-0x40],r15 */
35
37
#if !defined (LIBCO_NO_SSE )
36
- 0x0f , 0x29 , 0x72 , 0x50 , /* movaps [rdx+ 80],xmm6 */
37
- 0x0f , 0x29 , 0x7a , 0x60 , /* movaps [rdx+ 96],xmm7 */
38
- 0x44 , 0x0f , 0x29 , 0x42 , 0x70 , /* movaps [rdx+112],xmm8 */
39
- 0x48 , 0x83 , 0xc2 , 0x70 , /* add rdx,112 */
40
- 0x44 , 0x0f , 0x29 , 0x4a , 0x10 , /* movaps [rdx+ 16],xmm9 */
41
- 0x44 , 0x0f , 0x29 , 0x52 , 0x20 , /* movaps [rdx+ 32],xmm10 */
42
- 0x44 , 0x0f , 0x29 , 0x5a , 0x30 , /* movaps [rdx+ 48],xmm11 */
43
- 0x44 , 0x0f , 0x29 , 0x62 , 0x40 , /* movaps [rdx+ 64],xmm12 */
44
- 0x44 , 0x0f , 0x29 , 0x6a , 0x50 , /* movaps [rdx+ 80],xmm13 */
45
- 0x44 , 0x0f , 0x29 , 0x72 , 0x60 , /* movaps [rdx+ 96],xmm14 */
46
- 0x44 , 0x0f , 0x29 , 0x7a , 0x70 , /* movaps [rdx+112],xmm15 */
38
+ 0x0f , 0x29 , 0x72 , 0xd0 , /* movaps [rdx-0x30],xmm6 */
39
+ 0x0f , 0x29 , 0x7a , 0xe0 , /* movaps [rdx-0x20],xmm7 */
40
+ 0x44 , 0x0f , 0x29 , 0x42 , 0xf0 , /* movaps [rdx-0x10],xmm8 */
41
+ 0x44 , 0x0f , 0x29 , 0x0a , /* movaps [rdx], xmm9 */
42
+ 0x44 , 0x0f , 0x29 , 0x52 , 0x10 , /* movaps [rdx+0x10],xmm10 */
43
+ 0x44 , 0x0f , 0x29 , 0x5a , 0x20 , /* movaps [rdx+0x20],xmm11 */
44
+ 0x44 , 0x0f , 0x29 , 0x62 , 0x30 , /* movaps [rdx+0x30],xmm12 */
45
+ 0x44 , 0x0f , 0x29 , 0x6a , 0x40 , /* movaps [rdx+0x40],xmm13 */
46
+ 0x44 , 0x0f , 0x29 , 0x72 , 0x50 , /* movaps [rdx+0x50],xmm14 */
47
+ 0x44 , 0x0f , 0x29 , 0x7a , 0x60 , /* movaps [rdx+0x60],xmm15 */
47
48
#endif
48
- 0x48 , 0x8b , 0x69 , 0x08 , /* mov rbp,[rcx+ 8] */
49
- 0x48 , 0x8b , 0x71 , 0x10 , /* mov rsi,[rcx+16] */
50
- 0x48 , 0x8b , 0x79 , 0x18 , /* mov rdi,[rcx+24] */
51
- 0x48 , 0x8b , 0x59 , 0x20 , /* mov rbx,[rcx+32] */
52
- 0x4c , 0x8b , 0x61 , 0x28 , /* mov r12,[rcx+40] */
53
- 0x4c , 0x8b , 0x69 , 0x30 , /* mov r13,[rcx+48] */
54
- 0x4c , 0x8b , 0x71 , 0x38 , /* mov r14,[rcx+56] */
55
- 0x4c , 0x8b , 0x79 , 0x40 , /* mov r15,[rcx+64] */
49
+ 0x48 , 0x8b , 0x69 , 0x88 , /* mov rbp,[rcx-0x78] */
50
+ 0x48 , 0x8b , 0x71 , 0x90 , /* mov rsi,[rcx-0x70] */
51
+ 0x48 , 0x8b , 0x79 , 0x98 , /* mov rdi,[rcx-0x68] */
52
+ 0x48 , 0x8b , 0x59 , 0xa0 , /* mov rbx,[rcx-0x60] */
53
+ 0x4c , 0x8b , 0x61 , 0xa8 , /* mov r12,[rcx-0x58] */
54
+ 0x4c , 0x8b , 0x69 , 0xb0 , /* mov r13,[rcx-0x50] */
55
+ 0x4c , 0x8b , 0x71 , 0xb8 , /* mov r14,[rcx-0x48] */
56
+ 0x4c , 0x8b , 0x79 , 0xc0 , /* mov r15,[rcx-0x40] */
56
57
#if !defined (LIBCO_NO_SSE )
57
- 0x0f , 0x28 , 0x71 , 0x50 , /* movaps xmm6, [rcx+ 80] */
58
- 0x0f , 0x28 , 0x79 , 0x60 , /* movaps xmm7, [rcx+ 96] */
59
- 0x44 , 0x0f , 0x28 , 0x41 , 0x70 , /* movaps xmm8, [rcx+112] */
60
- 0x48 , 0x83 , 0xc1 , 0x70 , /* add rcx,112 */
61
- 0x44 , 0x0f , 0x28 , 0x49 , 0x10 , /* movaps xmm9, [rcx+ 16] */
62
- 0x44 , 0x0f , 0x28 , 0x51 , 0x20 , /* movaps xmm10,[rcx+ 32] */
63
- 0x44 , 0x0f , 0x28 , 0x59 , 0x30 , /* movaps xmm11,[rcx+ 48] */
64
- 0x44 , 0x0f , 0x28 , 0x61 , 0x40 , /* movaps xmm12,[rcx+ 64] */
65
- 0x44 , 0x0f , 0x28 , 0x69 , 0x50 , /* movaps xmm13,[rcx+ 80] */
66
- 0x44 , 0x0f , 0x28 , 0x71 , 0x60 , /* movaps xmm14,[rcx+ 96] */
67
- 0x44 , 0x0f , 0x28 , 0x79 , 0x70 , /* movaps xmm15,[rcx+112] */
58
+ 0x0f , 0x28 , 0x71 , 0xd0 , /* movaps xmm6, [rcx-0x30] */
59
+ 0x0f , 0x28 , 0x79 , 0xe0 , /* movaps xmm7, [rcx-0x20] */
60
+ 0x44 , 0x0f , 0x28 , 0x41 , 0xf0 , /* movaps xmm8, [rcx-0x10] */
61
+ 0x44 , 0x0f , 0x28 , 0x09 , /* movaps xmm9, [rcx] */
62
+ 0x44 , 0x0f , 0x28 , 0x51 , 0x10 , /* movaps xmm10,[rcx+0x10] */
63
+ 0x44 , 0x0f , 0x28 , 0x59 , 0x20 , /* movaps xmm11,[rcx+0x20] */
64
+ 0x44 , 0x0f , 0x28 , 0x61 , 0x30 , /* movaps xmm12,[rcx+0x30] */
65
+ 0x44 , 0x0f , 0x28 , 0x69 , 0x40 , /* movaps xmm13,[rcx+0x40] */
66
+ 0x44 , 0x0f , 0x28 , 0x71 , 0x50 , /* movaps xmm14,[rcx+0x50] */
67
+ 0x44 , 0x0f , 0x28 , 0x79 , 0x60 , /* movaps xmm15,[rcx+0x60] */
68
68
#endif
69
- 0xff , 0xe0 , /* jmp rax */
69
+ #if !defined (LIBCO_NO_TIB )
70
+ 0x65 , 0x4c , 0x8b , 0x04 , 0x25 , /* mov r8,gs:0x30 */
71
+ 0x30 , 0x00 , 0x00 , 0x00 ,
72
+ 0x41 , 0x0f , 0x10 , 0x40 , 0x08 , /* movups xmm0,[r8+0x8] */
73
+ 0x0f , 0x29 , 0x42 , 0x70 , /* movaps [rdx+0x70],xmm0 */
74
+ 0x0f , 0x28 , 0x41 , 0x70 , /* movaps xmm0,[rcx+0x70] */
75
+ 0x41 , 0x0f , 0x11 , 0x40 , 0x08 , /* movups [r8+0x8],xmm0 */
76
+ #endif
77
+ 0xff , 0xe0 , /* jmp rax */
70
78
};
71
79
72
80
#include <windows.h>
@@ -113,8 +121,14 @@ static void (*co_swap)(cothread_t, cothread_t) = 0;
113
121
}
114
122
#endif
115
123
116
- static void crash () {
117
- assert (0 ); /* called only if cothread_t entrypoint returns */
124
+ static void co_entrypoint (cothread_t handle ) {
125
+ long long * buffer = (long long * )handle ;
126
+ #ifdef _WIN32
127
+ buffer -= 16 ;
128
+ #endif
129
+ void (* entrypoint )(void ) = (void (* )(void ))buffer [1 ];
130
+ entrypoint ();
131
+ abort (); /* called only if cothread_t entrypoint returns */
118
132
}
119
133
120
134
cothread_t co_active () {
@@ -133,9 +147,14 @@ cothread_t co_derive(void* memory, unsigned int size, void (*entrypoint)(void))
133
147
if (handle = (cothread_t )memory ) {
134
148
unsigned int offset = (size & ~15 ) - 32 ;
135
149
long long * p = (long long * )((char * )handle + offset ); /* seek to top of stack */
136
- * -- p = (long long )crash ; /* crash if entrypoint returns */
137
- * -- p = (long long )entrypoint ; /* start of function */
138
- * (long long * )handle = (long long )p ; /* stack pointer */
150
+ * -- p = (long long )0 ; /* crash if entrypoint returns */
151
+ * -- p = (long long )co_entrypoint ;
152
+ ((long long * )handle )[0 ] = (long long )p ; /* stack pointer */
153
+ ((long long * )handle )[1 ] = (long long )entrypoint ; /* start of function */
154
+ #if defined(_WIN32 ) && !defined(LIBCO_NO_TIB )
155
+ ((long long * )handle )[30 ] = (long long )handle + size ; /* stack base */
156
+ ((long long * )handle )[31 ] = (long long )handle ; /* stack limit */
157
+ #endif
139
158
}
140
159
141
160
return handle ;
0 commit comments