forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
abi_aarch64.cpp
377 lines (357 loc) · 15.3 KB
/
abi_aarch64.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
// This file is a part of Julia. License is MIT: https://julialang.org/license
//===----------------------------------------------------------------------===//
//
// The ABI implementation used for AArch64 targets.
//
//===----------------------------------------------------------------------===//
//
// The Procedure Call Standard can be found here:
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
//
//===----------------------------------------------------------------------===//
struct ABI_AArch64Layout : AbiLayout {
Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
return nullptr;
size_t nfields = dt->layout->nfields;
assert(nfields > 0);
if (nfields < 2)
return nullptr;
Type *lltype;
// Short vector should be either 8 bytes or 16 bytes.
// Note that there are only two distinct fundamental types for
// short vectors so we normalize them to <2 x i32> and <4 x i32>
switch (jl_datatype_size(dt)) {
case 8:
lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 2);
break;
case 16:
lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 4);
break;
default:
return nullptr;
}
// Since `dt` is pointer free and has no padding and is 8 or 16 in size,
// `ft0` must be concrete, immutable with no padding and we don't need
// to check if its size is legal since it is included in
// the homogeneity check.
jl_datatype_t *ft0 = (jl_datatype_t*)jl_field_type(dt, 0);
// `ft0` should be a `VecElement` type and the true element type
// should be a primitive type
if (ft0->name != jl_vecelement_typename ||
((jl_datatype_t*)jl_field_type(ft0, 0))->layout->nfields)
return nullptr;
for (size_t i = 1; i < nfields; i++) {
if (jl_field_type(dt, i) != (jl_value_t*)ft0) {
// Not homogeneous
return nullptr;
}
}
return lltype;
}
#define jl_is_floattype(v) jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
Type *lltype;
// Check size first since it's cheaper.
switch (jl_datatype_size(dt)) {
case 2:
lltype = Type::getHalfTy(ctx);
break;
case 4:
lltype = Type::getFloatTy(ctx);
break;
case 8:
lltype = Type::getDoubleTy(ctx);
break;
case 16:
lltype = Type::getFP128Ty(ctx);
break;
default:
return nullptr;
}
return ((jl_floatingpoint_type && jl_is_floattype((jl_value_t*)dt)) ?
lltype : nullptr);
}
Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
if (dt->name->mutabl || dt->layout->npointers || dt->layout->haspadding)
return nullptr;
return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
}
struct ElementType {
Type *type;
size_t sz;
ElementType() : type(nullptr), sz(0) {};
};
// Whether a type is a homogeneous floating-point aggregates (HFA) or a
// homogeneous short-vector aggregates (HVA). Returns the element type.
// An Homogeneous Aggregate is a Composite Type where all of the Fundamental
// Data Types of the members that compose the type are the same.
// Note that it is the fundamental types that are important and not the member
// types.
bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele, LLVMContext &ctx) const
{
// Assume:
// dt is a pointerfree type, (all members are isbits)
// dsz == jl_datatype_size(dt) > 0
// 0 <= nele <= 3
// dt has no padding
// We ignore zero sized member here. This isn't really consistent with
// GCC for zero-sized array members. GCC seems to treat structs with
// zero sized array members as non-HFA and non-HVA. Clang (3.7 and 3.8)
// handles this slightly differently.
// Ref https://llvm.org/bugs/show_bug.cgi?id=26162
while (size_t nfields = jl_datatype_nfields(dt)) {
// For composite types, find the first non zero sized member
size_t i;
size_t fieldsz;
for (i = 0;i < nfields;i++) {
if ((fieldsz = jl_field_size(dt, i))) {
break;
}
}
assert(i < nfields);
// If there's only one non zero sized member, try again on this member
if (fieldsz == dsz) {
dt = (jl_datatype_t*)jl_field_type(dt, i);
continue;
}
if (Type *vectype = get_llvm_vectype(dt, ctx)) {
if ((ele.sz && dsz != ele.sz) || (ele.type && ele.type != vectype))
return false;
ele.type = vectype;
ele.sz = dsz;
nele++;
return true;
}
// Otherwise, process each members
for (;i < nfields;i++) {
size_t fieldsz = jl_field_size(dt, i);
if (fieldsz == 0)
continue;
jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i);
// Check element count.
// This needs to be done after the zero size member check
if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele, ctx)) {
return false;
}
}
return true;
}
// For bitstypes
if (ele.sz && dsz != ele.sz)
return false;
Type *new_type = get_llvm_fptype(dt, ctx);
if (new_type && (!ele.type || ele.type == new_type)) {
ele.type = new_type;
ele.sz = dsz;
nele++;
return true;
}
return false;
}
Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
// An Homogeneous Floating-point Aggregate (HFA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Floating-Point type and at most
// four uniquely addressable members.
// An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Short-Vector type and at most four
// uniquely addressable members.
// Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
size_t dsz = jl_datatype_size(dt);
if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->haspadding)
return NULL;
nele = 0;
ElementType eltype;
if (isHFAorHVA(dt, dsz, nele, eltype, ctx))
return eltype.type;
return NULL;
}
bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx) override
{
// B.2
// If the argument type is an HFA or an HVA, then the argument is used
// unmodified.
size_t size;
if (isHFAorHVA(dt, size, ctx))
return false;
// B.3
// If the argument type is a Composite Type that is larger than 16 bytes,
// then the argument is copied to memory allocated by the caller and the
// argument is replaced by a pointer to the copy.
// We only check for the total size and not whether it is a composite type
// since there's no corresponding C type and we just treat such large
// bitstype as a composite type of the right size.
return jl_datatype_size(dt) > 16;
// B.4
// If the argument type is a Composite Type then the size of the argument
// is rounded up to the nearest multiple of 8 bytes.
}
// Determine which kind of register the argument will be passed in and
// if the argument has to be passed on stack (including by reference).
//
// If the argument should be passed in SIMD and floating-point registers,
// we may need to rewrite the argument types to [n x ftype].
// If the argument should be passed in general purpose registers, we may need
// to rewrite the argument types to [n x i64].
//
// If the argument has to be passed on stack, we need to use sret.
//
// All the out parameters should be default to `false`.
Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
size_t *rewrite_len, LLVMContext &ctx) const
{
// Based on section 5.4 C of the Procedure Call Standard
// C.1
// If the argument is a Half-, Single-, Double- or Quad- precision
// Floating-point or Short Vector Type and the NSRN is less than 8, then
// the argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated.
if (get_llvm_fp_or_vectype(dt, ctx)) {
*fpreg = true;
return NULL;
}
// C.2
// If the argument is an HFA or an HVA and there are sufficient
// unallocated SIMD and Floating-point registers (NSRN + number of
// members <= 8), then the argument is allocated to SIMD and
// Floating-point Registers (with one register per member of the HFA
// or HVA). The NSRN is incremented by the number of registers used.
// The argument has now been allocated.
if (Type *eltype = isHFAorHVA(dt, *rewrite_len, ctx)) {
assert(*rewrite_len > 0 && *rewrite_len <= 4);
// HFA and HVA have <= 4 members
*fpreg = true;
// Rewrite to [n x eltype] where n is the number of fundamental types.
return eltype;
}
// Check if the argument needs to be passed by reference. This should be
// done before starting step C but we do this here to avoid checking for
// HFA and HVA twice. We don't check whether it is a composite type.
// See `needPassByRef` above.
if (jl_datatype_size(dt) > 16) {
*onstack = true;
return NULL;
}
// C.3
// If the argument is an HFA or an HVA then the NSRN is set to 8 and the
// size of the argument is rounded up to the nearest multiple of 8 bytes.
// C.4
// If the argument is an HFA, an HVA, a Quad-precision Floating-point or
// Short Vector Type then the NSAA is rounded up to the larger of 8 or
// the Natural Alignment of the argument’s type.
// C.5
// If the argument is a Half- or Single- precision Floating Point type,
// then the size of the argument is set to 8 bytes. The effect is as if
// the argument had been copied to the least significant bits of a 64-bit
// register and the remaining bits filled with unspecified values.
// C.6
// If the argument is an HFA, an HVA, a Half-, Single-, Double- or
// Quad- precision Floating-point or Short Vector Type, then the argument
// is copied to memory at the adjusted NSAA. The NSAA is incremented
// by the size of the argument. The argument has now been allocated.
// <already included in the C.2 case above>
// C.7
// If the argument is an Integral or Pointer Type, the size of the
// argument is less than or equal to 8 bytes and the NGRN is less than 8,
// the argument is copied to the least significant bits in x[NGRN].
// The NGRN is incremented by one. The argument has now been allocated.
// Here we treat any bitstype of the right size as integers or pointers
// This is needed for types like Cstring which should be treated as
// pointers. We don't need to worry about floating points here since they
// are handled above.
if (jl_is_immutable(dt) && jl_datatype_nfields(dt) == 0 &&
(jl_datatype_size(dt) == 1 || jl_datatype_size(dt) == 2 ||
jl_datatype_size(dt) == 4 || jl_datatype_size(dt) == 8 ||
jl_datatype_size(dt) == 16))
return NULL;
// C.8
// If the argument has an alignment of 16 then the NGRN is rounded up to
// the next even number.
// C.9
// If the argument is an Integral Type, the size of the argument is equal
// to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
// and x[NGRN+1]. x[NGRN] shall contain the lower addressed double-word
// of the memory representation of the argument. The NGRN is incremented
// by two. The argument has now been allocated.
// <merged into C.7 above>
// C.10
// If the argument is a Composite Type and the size in double-words of
// the argument is not more than 8 minus NGRN, then the argument is
// copied into consecutive general-purpose registers, starting at x[NGRN].
// The argument is passed as though it had been loaded into the registers
// from a double-word-aligned address with an appropriate sequence of LDR
// instructions loading consecutive registers from memory (the contents of
// any unused parts of the registers are unspecified by this standard).
// The NGRN is incremented by the number of registers used. The argument
// has now been allocated.
// We don't check for composite types here since the ones that have
// corresponding C types are already handled and we just treat the ones
// with weird size as a black box composite type.
// The type can fit in 8 x 8 bytes since it is handled by
// need_pass_by_ref otherwise.
// 0-size types (Nothing) won't be rewritten and that is what we want
assert(jl_datatype_size(dt) <= 16); // Should be pass by reference otherwise
*rewrite_len = (jl_datatype_size(dt) + 7) >> 3;
// Rewrite to [n x Int64] where n is the **size in dword**
return jl_datatype_size(dt) ? Type::getInt64Ty(ctx) : NULL;
// C.11
// The NGRN is set to 8.
// C.12
// The NSAA is rounded up to the larger of 8 or the Natural Alignment
// of the argument’s type.
// C.13
// If the argument is a composite type then the argument is copied to
// memory at the adjusted NSAA. The NSAA is incremented by the size of
// the argument. The argument has now been allocated.
// <handled by C.10 above>
// C.14
// If the size of the argument is less than 8 bytes then the size of the
// argument is set to 8 bytes. The effect is as if the argument was
// copied to the least significant bits of a 64-bit register and the
// remaining bits filled with unspecified values.
// C.15
// The argument is copied to memory at the adjusted NSAA. The NSAA is
// incremented by the size of the argument. The argument has now been
// allocated.
// <handled by C.10 above>
}
bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
{
// Section 5.5
// If the type, T, of the result of a function is such that
//
// void func(T arg)
//
// would require that arg be passed as a value in a register (or set of
// registers) according to the rules in section 5.4 Parameter Passing,
// then the result is returned in the same registers as would be used for
// such an argument.
bool fpreg = false;
bool onstack = false;
size_t rewrite_len = 0;
classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx);
return onstack;
}
Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
{
if (Type *fptype = get_llvm_fp_or_vectype(dt, ctx))
return fptype;
bool fpreg = false;
bool onstack = false;
size_t rewrite_len = 0;
if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx))
return ArrayType::get(rewrite_ty, rewrite_len);
return NULL;
}
};