forked from JuliaLang/julia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathllvm-late-gc-lowering.cpp
2182 lines (2069 loc) · 92.4 KB
/
llvm-late-gc-lowering.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// This file is a part of Julia. License is MIT: https://julialang.org/license
#include <llvm/ADT/BitVector.h>
#include <llvm/ADT/PostOrderIterator.h>
#include <llvm/ADT/SetVector.h>
#include <llvm/ADT/SmallVector.h>
#include "llvm/Analysis/CFG.h"
#include <llvm/IR/Value.h>
#include <llvm/IR/Constants.h>
#include <llvm/IR/Dominators.h>
#include <llvm/IR/Function.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/IR/CallSite.h>
#include <llvm/IR/MDBuilder.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Pass.h>
#include <llvm/Support/Debug.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/Transforms/Utils/ModuleUtils.h>
#include "llvm-version.h"
#include "codegen_shared.h"
#include "julia.h"
#include "julia_internal.h"
#include "julia_assert.h"
#define DEBUG_TYPE "late_lower_gcroot"
using namespace llvm;
/* Julia GC Root Placement pass. For a general overview of the design of GC
root lowering, see the devdocs. This file is the actual implementation.
The actual algorithm is fairly straightforward. First recall the goal of this
pass:
Minimize the number of needed gc roots/stores to them subject to the constraint
that at every safepoint, any live gc-tracked pointer (i.e. for which there is
a path after this point that contains a use of this pointer) is in some gc slot.
In particular, in order to understand this algorithm, it is important to
realize that the only places where rootedness matters is at safepoints.
Now, the primary phases of the algorithm are:
1. Local Scan
During this step, each Basic Block is inspected and analyzed for local
properties. In particular, we want to determine the ordering of any of
the following activities:
- Any Def of a gc-tracked pointer. In general Defs are the results of
calls or loads from appropriate memory locations. Phi nodes and
selects do complicate this story slightly as described below.
- Any use of a gc-tracked or derived pointer. As described in the
devdocs, a use is in general one of
a) a load from a tracked/derived value
b) a store to a tracked/derived value
c) a store OF a tracked/derived value
d) a use of a value as a call operand (including operand bundles)
- Any safepoint
Crucially, we also perform pointer numbering during the local scan,
assigning every Def a unique integer and caching the integer for each
derived pointer. This allows us to operate only on the set of Defs (
represented by these integers) for the rest of the algorithm. We also
maintain some local utility information that is needed by later passes
(see the BBState struct for details).
2. Dataflow Computation
This computation operates entirely over the function's control flow graph
and does not look into a basic block. The algorithm is essentially
textbook iterative data flow for liveness computation. However, the
data flow equations are slightly more complicated because we also
forward propagate rootedness information in addition to backpropagating
liveness.
3. Live Set Computation
With the liveness information from the previous step, we can now compute,
for every safepoint, the set of values live at that particular safepoint.
There are three pieces of information being combined here:
i. Values that needed to be live due to local analysis (e.g. there
was a def, then a safepoint, then a use). This was computed during
local analysis.
ii. Values that are live across the basic block (i.e. they are live
at every safepoint within the basic block). This relies entirely
on the liveness information.
iii. Values that are now live-out from the basic block (i.e. they are
live at every safepoint following their def). During local
analysis, we keep, for every safepoint, those values that would
be live if they were live out. Here we can check if they are
actually live-out and make the appropriate additions to the live
set.
Lastly, we also explicitly compute, for each value, the list of values
that are simultaneously live at some safepoint. This is known as an
"interference graph" and is the input to the next step.
4. GC Root coloring
Two values which are not simultaneously live at a safepoint can share the
same slot. This is an important optimization, because otherwise long
functions would have exceptionally large GC slots, reducing performance
and bloating the size of the stack. Assigning values to these slots is
equivalent to doing graph coloring on the interference graph - the graph
where nodes are values and two values have an edge if they are
simultaneously live at a safepoint - which we computed in the previous
step. Now graph coloring in general is a hard problem. However, for SSA
form programs, (and most programs in general, by virtue of their
structure), the resulting interference graphs are chordal and can be
colored optimally in linear time by performing greedy coloring in a
perfect elimination order. Now, our interference graphs are likely not
entirely chordal due to some non-SSA corner cases. However, using the same
algorithm should still give a very good coloring while having sufficiently
low runtime.
5. JLCall frame optimizations
Unlike earlier iterations of the gc root placement logic, jlcall frames
are no longer treated as a special case and need not necessarily be sunk
into the gc frame. Additionally, we now emit lifetime
intrinsics, so regular stack slot coloring will merge any jlcall frames
not sunk into the gc frame. Nevertheless performing such sinking can still
be profitable. Since all arguments to a jlcall are guaranteed to be live
at that call in some gc slot, we can attempt to rearrange the slots within
the gc-frame, or re-use slots not assigned at that particular location
for the gcframe. However, even without this optimization, stack frames
are at most two times larger than optimal (because regular stack coloring
can merge the jlcall allocas).
N.B.: This step is not yet implemented.
6. Root placement
This performs the actual insertion of the GCFrame pushes/pops, zeros out
the gc frame and creates the stores to the gc frame according to the
stack slot assignment computed in the previous step. GC frames stores
are generally sunk right before the first safe point that use them
(this is beneficial for code where the primary path does not have
safepoints, but some other path - e.g. the error path does). However,
if the first safepoint is not dominated by the definition (this can
happen due to the non-ssa corner cases), the store is inserted right after
the definition.
7. Cleanup
This step performs necessary cleanup before passing the IR to codegen. In
particular, it removes any calls to julia_from_objref intrinsics and
removes the extra operand bundles from ccalls. In the future it could
also strip the addrspace information from all values as this
information is no longer needed.
There are a couple important special cases that deserve special attention:
A. PHIs and Selects
In general PHIs and selects are treated as separate defs for the purposes
of the algorithm and their operands as uses of those values. It is
important to consider however WHERE the uses of PHI's operands are
located. It is neither at the start of the basic block, because the values
do not dominate the block (so can't really consider them live-in), nor
at the end of the predecessor (because they are actually live out).
Instead it is best to think of those uses as living on the edge between
the appropriate predecessor and the block containing the PHI.
Another concern is PHIs of derived values. Since we cannot simply root
these values by storing them to a GC slot, we need to insert a new,
artificial PHI that tracks the base pointers for the derived values. E.g.
in:
A:
%Abase = load addrspace(10) *...
%Aderived = addrspacecast %Abase to addrspace(11)
B:
%Bbase = load addrspace(10) *...
%Bderived = addrspacecast %Bbase to addrspace(11)
C:
%phi = phi [%Aderived, %A
%Bderived, %B]
we will insert another phi in C to track the relevant base pointers:
%philift = phi [%Abase, %A
%Bbase, %B]
We then pretend, for the purposes of numbering that %phi was derived from
%philift. Note that in order to be able to do this, we need to be able to
perform this lifting either during numbering or instruction scanning.
B. Vectors of pointers/Union representations
Since this pass runs very late in the pass pipeline, it runs after the
various vectorization passes. As a result, we have to potentially deal
with vectors of gc-tracked pointers. For the purposes of most of the
algorithm, we simply assign every element of the vector a separate number
and no changes are needed. However, those parts of the algorithm that
look at IR need to be aware of the possibility of encountering vectors of
pointers.
Similarly, unions (e.g. in call returns) are represented as a struct of
a gc-tracked value and an argument selector. We simply assign a single
number to this struct and proceed as if it was a single pointer. However,
this again requires care at the IR level.
C. Non mem2reg'd allocas
Under some circumstances, allocas will still be present in the IR when
we get to this pass. We don't try very hard to handle this case, and
simply sink the alloca into the GCFrame.
*/
struct BBState {
// These do not get updated after local analysis
BitVector Defs;
BitVector PhiOuts;
//// Upward exposed uses that do not have a preceding safepoint
BitVector UpExposedUsesUnrooted;
//// All other uses
BitVector UpExposedUses;
// These get updated during dataflow
BitVector LiveIn;
BitVector LiveOut;
std::vector<int> Safepoints;
int TopmostSafepoint = -1;
bool HasSafepoint = false;
// Have we gone through this basic block in our local scan yet?
bool Done = false;
};
struct State {
Function *const F;
DominatorTree *DT;
// The maximum assigned value number
int MaxPtrNumber;
// The maximum assigned safepoint number
int MaxSafepointNumber;
// Cache of numbers assigned to IR values. This includes caching of numbers
// for derived values
std::map<Value *, int> AllPtrNumbering;
std::map<Value *, std::vector<int>> AllVectorNumbering;
// Numbering of pointers. This only includes Defs
std::map<Value *, int> PtrNumbering;
// The reverse of the previous maps
std::map<int, Value *> ReversePtrNumbering;
// Neighbors in the coloring interference graph. I.e. for each value, the
// indices of other values that are used simultaneously at some safe point.
std::vector<SetVector<int>> Neighbors;
// The result of the local analysis
std::map<BasicBlock *, BBState> BBStates;
// Refinement map. If all of the values are rooted
// (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
// the key is already rooted (but not the other way around).
// A value that can be refined to -2 never need any rooting or write barrier.
// A value that can be refined to -1 don't need local root but still need write barrier.
// At the end of `LocalScan` this map has a few properties
// 1. Values are either < 0 or dominates the key
// 2. Therefore this is a DAG
std::map<int, SmallVector<int, 1>> Refinements;
// GC preserves map. All safepoints dominated by the map key, but not any
// of its uses need to preserve the values listed in the map value.
std::map<Instruction *, std::vector<int>> GCPreserves;
// The assignment of numbers to safepoints. The indices in the map
// are indices into the next three maps which store safepoint properties
std::map<Instruction *, int> SafepointNumbering;
// Reverse mapping index -> safepoint
std::vector<Instruction *> ReverseSafepointNumbering;
// Instructions that can return twice. For now, all values live at these
// instructions will get their own, dedicated GC frame slots, because they
// have unobservable control flow, so we can't be sure where they're
// actually live. All of these are also considered safepoints.
std::vector<Instruction *> ReturnsTwice;
// The set of values live at a particular safepoint
std::vector<BitVector> LiveSets;
// Those values that - if live out from our parent basic block - are live
// at this safepoint.
std::vector<std::vector<int>> LiveIfLiveOut;
// We don't bother doing liveness on Allocas that were not mem2reg'ed.
// they just get directly sunk into the root array.
std::vector<AllocaInst *> Allocas;
State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
};
namespace llvm {
void initializeLateLowerGCFramePass(PassRegistry &Registry);
}
extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false);
struct LateLowerGCFrame: public FunctionPass {
static char ID;
LateLowerGCFrame() : FunctionPass(ID)
{
llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
tbaa_gcframe = tbaa_make_child("jtbaa_gcframe").first;
MDNode *tbaa_data;
MDNode *tbaa_data_scalar;
std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child("jtbaa_data");
tbaa_tag = tbaa_make_child("jtbaa_tag", tbaa_data_scalar).first;
}
protected:
void getAnalysisUsage(AnalysisUsage &AU) const override {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.setPreservesCFG();
}
private:
Type *T_prjlvalue;
Type *T_ppjlvalue;
Type *T_size;
Type *T_int8;
Type *T_int32;
Type *T_pint8;
Type *T_pjlvalue;
Type *T_pjlvalue_der;
Type *T_ppjlvalue_der;
MDNode *tbaa_gcframe;
MDNode *tbaa_tag;
Function *ptls_getter;
Function *gc_flush_func;
Function *gc_preserve_begin_func;
Function *gc_preserve_end_func;
Function *pointer_from_objref_func;
Function *alloc_obj_func;
Function *typeof_func;
Function *write_barrier_func;
Function *queueroot_func;
Function *pool_alloc_func;
Function *big_alloc_func;
CallInst *ptlsStates;
void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses);
void NoteUse(State &S, BBState &BBS, Value *V) {
NoteUse(S, BBS, V, BBS.UpExposedUses);
}
Value *MaybeExtractUnion(std::pair<Value*,int> Val, Instruction *InsertBefore);
void LiftPhi(State &S, PHINode *Phi, SmallVector<int, 16> &PHINumbers);
bool LiftSelect(State &S, SelectInst *SI);
int Number(State &S, Value *V);
std::vector<int> NumberVector(State &S, Value *Vec);
int NumberBase(State &S, Value *V, Value *Base);
std::vector<int> NumberVectorBase(State &S, Value *Base);
void NoteOperandUses(State &S, BBState &BBS, User &UI, BitVector &Uses);
void NoteOperandUses(State &S, BBState &BBS, User &UI){
NoteOperandUses(S, BBS, UI, BBS.UpExposedUses);
}
State LocalScan(Function &F);
void ComputeLiveness(State &S);
void ComputeLiveSets(State &S);
void PushGCFrame(AllocaInst *gcframe, unsigned NRoots, Instruction *InsertAfter);
void PopGCFrame(AllocaInst *gcframe, Instruction *InsertBefore);
std::vector<int> ColorRoots(const State &S);
void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertionPoint);
void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame);
void PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
bool doInitialization(Module &M) override;
void reinitFunctions(Module &M);
bool doFinalization(Module &) override;
bool runOnFunction(Function &F) override;
Instruction *get_pgcstack(Instruction *ptlsStates);
bool CleanupIR(Function &F, State *S=nullptr);
void NoteUseChain(State &S, BBState &BBS, User *TheUser);
SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
void RefineLiveSet(BitVector &LS, State &S);
Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V);
Value *EmitLoadTag(IRBuilder<> &builder, Value *V);
};
static unsigned getValueAddrSpace(Value *V) {
Type *Ty = V->getType();
if (isa<VectorType>(Ty))
Ty = cast<VectorType>(V->getType())->getElementType();
return cast<PointerType>(Ty)->getAddressSpace();
}
static bool isSpecialPtr(Type *Ty) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy)
return false;
unsigned AS = PTy->getAddressSpace();
return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial;
}
static bool isSpecialPtrVec(Type *Ty) {
auto *VTy = dyn_cast<VectorType>(Ty);
if (!VTy)
return false;
return isSpecialPtr(VTy->getElementType());
}
static bool isUnionRep(Type *Ty) {
return Ty->isStructTy() && cast<StructType>(Ty)->getNumElements() == 2 &&
isSpecialPtr(cast<StructType>(Ty)->getTypeAtIndex((unsigned)0));
}
// If the input value is a scalar (pointer), we may return a vector value as base
// in which case the second member of the pair is the index of the value in the vector.
static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCache = true) {
Value *CurrentV = V;
int fld_idx = -1;
while (true) {
if (UseCache) {
if (CurrentV->getType()->isPointerTy()) {
auto it = S.AllPtrNumbering.find(CurrentV);
if (it != S.AllPtrNumbering.end())
return std::make_pair(CurrentV, fld_idx);
} else {
auto it = S.AllVectorNumbering.find(CurrentV);
if (it != S.AllVectorNumbering.end())
return std::make_pair(CurrentV, fld_idx);
}
}
if (isa<BitCastInst>(CurrentV))
CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
else if (isa<AddrSpaceCastInst>(CurrentV)) {
Value *NewV = cast<AddrSpaceCastInst>(CurrentV)->getOperand(0);
if (getValueAddrSpace(NewV) == 0)
break;
CurrentV = NewV;
}
else if (isa<GetElementPtrInst>(CurrentV)) {
CurrentV = cast<GetElementPtrInst>(CurrentV)->getOperand(0);
// GEP can make vectors from a single base pointer
if (fld_idx != -1 && !isSpecialPtrVec(CurrentV->getType())) {
fld_idx = -1;
}
} else if (isa<ExtractValueInst>(CurrentV)) {
Value *Operand = cast<ExtractValueInst>(CurrentV)->getOperand(0);
if (!isUnionRep(Operand->getType()))
break;
CurrentV = Operand;
}
else if (isa<InsertValueInst>(CurrentV)) {
if (!isUnionRep(CurrentV->getType()))
break;
InsertValueInst *IVI = cast<InsertValueInst>(CurrentV);
assert(IVI->getNumIndices() == 1);
unsigned idx = IVI->getIndices()[0];
if (idx == 0) {
// Updating the pointer in the union. Follow the pointer.
CurrentV = IVI->getOperand(1);
} else {
// Updating which tindex is active. Follow the union.
assert(idx == 1);
CurrentV = IVI->getOperand(0);
}
}
else if (auto EEI = dyn_cast<ExtractElementInst>(CurrentV)) {
assert(CurrentV->getType()->isPointerTy() && fld_idx == -1);
// For now, only support constant index.
auto IdxOp = cast<ConstantInt>(EEI->getIndexOperand());
fld_idx = IdxOp->getLimitedValue(INT_MAX);
CurrentV = EEI->getVectorOperand();
}
else if (auto LI = dyn_cast<LoadInst>(CurrentV)) {
if (auto PtrT = dyn_cast<PointerType>(LI->getType())) {
if (PtrT->getAddressSpace() == AddressSpace::Loaded) {
CurrentV = LI->getPointerOperand();
if (!isSpecialPtr(CurrentV->getType())) {
// Special case to bypass the check below.
// This could really be anything, but it's not loaded
// from a tracked pointer, so it doesn't matter what
// it is.
return std::make_pair(CurrentV, fld_idx);
}
continue;
}
}
// In general a load terminates a walk
break;
}
else {
break;
}
}
assert(isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) ||
isa<Argument>(CurrentV) || isa<SelectInst>(CurrentV) ||
isa<PHINode>(CurrentV) || isa<AddrSpaceCastInst>(CurrentV) ||
isa<Constant>(CurrentV) || isa<AllocaInst>(CurrentV) ||
isa<ExtractValueInst>(CurrentV) ||
isa<InsertElementInst>(CurrentV) ||
isa<ShuffleVectorInst>(CurrentV));
return std::make_pair(CurrentV, fld_idx);
}
Value *LateLowerGCFrame::MaybeExtractUnion(std::pair<Value*,int> Val, Instruction *InsertBefore) {
if (isUnionRep(Val.first->getType())) {
assert(Val.second == -1);
return ExtractValueInst::Create(Val.first, {(unsigned)0}, "", InsertBefore);
}
else if (Val.second != -1) {
return ExtractElementInst::Create(Val.first, ConstantInt::get(T_int32, Val.second),
"", InsertBefore);
}
return Val.first;
}
static Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertionPoint)
{
Value *Val = S.ReversePtrNumbering[Num];
if (isSpecialPtrVec(Val->getType())) {
const std::vector<int> &AllNums = S.AllVectorNumbering[Val];
unsigned Idx = 0;
for (; Idx < AllNums.size(); ++Idx) {
if ((unsigned)AllNums[Idx] == Num)
break;
}
Val = ExtractElementInst::Create(Val, ConstantInt::get(
Type::getInt32Ty(Val->getContext()), Idx), "", InsertionPoint);
}
return Val;
}
bool LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
if (isSpecialPtrVec(SI->getType())) {
VectorType *VT = cast<VectorType>(SI->getType());
std::vector<int> TrueNumbers = NumberVector(S, SI->getTrueValue());
std::vector<int> FalseNumbers = NumberVector(S, SI->getFalseValue());
std::vector<int> Numbers;
for (unsigned i = 0; i < VT->getNumElements(); ++i) {
SelectInst *LSI = SelectInst::Create(SI->getCondition(),
TrueNumbers[i] < 0 ?
ConstantPointerNull::get(cast<PointerType>(T_prjlvalue)) :
GetPtrForNumber(S, TrueNumbers[i], SI),
FalseNumbers[i] < 0 ?
ConstantPointerNull::get(cast<PointerType>(T_prjlvalue)) :
GetPtrForNumber(S, FalseNumbers[i], SI),
"gclift", SI);
int Number = ++S.MaxPtrNumber;
Numbers.push_back(Number);
S.PtrNumbering[LSI] = S.AllPtrNumbering[LSI] = Number;
S.ReversePtrNumbering[Number] = LSI;
}
S.AllVectorNumbering[SI] = Numbers;
} else {
Value *TrueBase = MaybeExtractUnion(FindBaseValue(S, SI->getTrueValue(), false), SI);
Value *FalseBase = MaybeExtractUnion(FindBaseValue(S, SI->getFalseValue(), false), SI);
if (getValueAddrSpace(TrueBase) != AddressSpace::Tracked)
TrueBase = ConstantPointerNull::get(cast<PointerType>(FalseBase->getType()));
if (getValueAddrSpace(FalseBase) != AddressSpace::Tracked)
FalseBase = ConstantPointerNull::get(cast<PointerType>(TrueBase->getType()));
if (getValueAddrSpace(TrueBase) != AddressSpace::Tracked)
return false;
Value *SelectBase = SelectInst::Create(SI->getCondition(),
TrueBase, FalseBase, "gclift", SI);
int Number = ++S.MaxPtrNumber;
S.PtrNumbering[SelectBase] = S.AllPtrNumbering[SelectBase] =
S.AllPtrNumbering[SI] = Number;
S.ReversePtrNumbering[Number] = SelectBase;
}
return true;
}
void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi, SmallVector<int, 16> &PHINumbers)
{
if (isSpecialPtrVec(Phi->getType())) {
VectorType *VT = cast<VectorType>(Phi->getType());
std::vector<PHINode *> lifted;
for (unsigned i = 0; i < VT->getNumElements(); ++i) {
lifted.push_back(PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi));
}
for (unsigned i = 0; i < Phi->getNumIncomingValues(); ++i) {
std::vector<int> Numbers = NumberVector(S, Phi->getIncomingValue(i));
BasicBlock *IncomingBB = Phi->getIncomingBlock(i);
Instruction *Terminator = IncomingBB->getTerminator();
for (unsigned i = 0; i < VT->getNumElements(); ++i) {
if (Numbers[i] < 0)
lifted[i]->addIncoming(ConstantPointerNull::get(cast<PointerType>(T_prjlvalue)), IncomingBB);
else
lifted[i]->addIncoming(GetPtrForNumber(S, Numbers[i], Terminator), IncomingBB);
}
}
std::vector<int> Numbers;
for (unsigned i = 0; i < VT->getNumElements(); ++i) {
int Number = ++S.MaxPtrNumber;
PHINumbers.push_back(Number);
Numbers.push_back(Number);
S.PtrNumbering[lifted[i]] = S.AllPtrNumbering[lifted[i]] = Number;
S.ReversePtrNumbering[Number] = lifted[i];
}
S.AllVectorNumbering[Phi] = Numbers;
} else {
PHINode *lift = PHINode::Create(T_prjlvalue, Phi->getNumIncomingValues(), "gclift", Phi);
for (unsigned i = 0; i < Phi->getNumIncomingValues(); ++i) {
Value *Incoming = Phi->getIncomingValue(i);
Value *Base = MaybeExtractUnion(FindBaseValue(S, Incoming, false),
Phi->getIncomingBlock(i)->getTerminator());
if (getValueAddrSpace(Base) != AddressSpace::Tracked)
Base = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
if (Base->getType() != T_prjlvalue)
Base = new BitCastInst(Base, T_prjlvalue, "", Phi->getIncomingBlock(i)->getTerminator());
lift->addIncoming(Base, Phi->getIncomingBlock(i));
}
int Number = ++S.MaxPtrNumber;
PHINumbers.push_back(Number);
S.PtrNumbering[lift] = S.AllPtrNumbering[lift] =
S.AllPtrNumbering[Phi] = Number;
S.ReversePtrNumbering[Number] = lift;
}
}
int LateLowerGCFrame::NumberBase(State &S, Value *V, Value *CurrentV)
{
auto it = S.AllPtrNumbering.find(CurrentV);
if (it != S.AllPtrNumbering.end())
return it->second;
int Number;
bool isUnion = isUnionRep(CurrentV->getType());
if (isa<Constant>(CurrentV)) {
// Perm rooted
Number = -2;
} else if (isa<Argument>(CurrentV) ||
((isa<AllocaInst>(CurrentV) || isa<AddrSpaceCastInst>(CurrentV)) &&
getValueAddrSpace(CurrentV) != AddressSpace::Tracked)) {
// We know this is rooted in the parent
Number = -1;
} else if (!isSpecialPtr(CurrentV->getType()) && !isUnion) {
// Externally rooted somehow hopefully (otherwise there's a bug in the
// input IR)
Number = -1;
} else if (isa<SelectInst>(CurrentV) && !isUnion && getValueAddrSpace(CurrentV) != AddressSpace::Tracked) {
Number = -1;
if (LiftSelect(S, cast<SelectInst>(CurrentV)))
Number = S.AllPtrNumbering[V] = S.AllPtrNumbering.at(CurrentV);
return Number;
} else if (isa<PHINode>(CurrentV) && !isUnion && getValueAddrSpace(CurrentV) != AddressSpace::Tracked) {
SmallVector<int, 16> PHINumbers;
LiftPhi(S, cast<PHINode>(CurrentV), PHINumbers);
Number = S.AllPtrNumbering[V] = S.AllPtrNumbering.at(CurrentV);
return Number;
} else if (isa<ExtractValueInst>(CurrentV) && !isUnion) {
assert(false && "TODO: Extract");
abort();
} else {
assert(
(CurrentV->getType()->isPointerTy() &&
getValueAddrSpace(CurrentV) == AddressSpace::Tracked) ||
isUnion);
Number = ++S.MaxPtrNumber;
S.ReversePtrNumbering[Number] = CurrentV;
}
S.PtrNumbering[CurrentV] = S.AllPtrNumbering[CurrentV] = S.AllPtrNumbering[V] = Number;
return Number;
}
int LateLowerGCFrame::Number(State &S, Value *V) {
assert(isSpecialPtr(V->getType()) || isUnionRep(V->getType()));
auto CurrentV = FindBaseValue(S, V);
if (CurrentV.second == -1)
return NumberBase(S, V, CurrentV.first);
auto Numbers = NumberVectorBase(S, CurrentV.first);
auto Number = Numbers.size() == 0 ? -1 : Numbers[CurrentV.second];
S.AllPtrNumbering[V] = Number;
return Number;
}
std::vector<int> LateLowerGCFrame::NumberVectorBase(State &S, Value *CurrentV) {
auto it = S.AllVectorNumbering.find(CurrentV);
if (it != S.AllVectorNumbering.end())
return it->second;
std::vector<int> Numbers{};
if (isa<Constant>(CurrentV) ||
((isa<Argument>(CurrentV) || isa<AllocaInst>(CurrentV) ||
isa<AddrSpaceCastInst>(CurrentV)) &&
getValueAddrSpace(CurrentV) != AddressSpace::Tracked)) {
Numbers.resize(cast<VectorType>(CurrentV->getType())->getNumElements(), -1);
}
/* We (the frontend) don't insert either of these, but it would be legal -
though a bit strange, considering they're pointers - for the optimizer to
do so. All that's needed here is to NumberVector the previous vector/value
and lift the operation */
else if (auto *SVI = dyn_cast<ShuffleVectorInst>(CurrentV)) {
std::vector<int> Numbers1 = NumberVectorBase(S, SVI->getOperand(0));
std::vector<int> Numbers2 = NumberVectorBase(S, SVI->getOperand(1));
auto Mask = SVI->getShuffleMask();
for (unsigned idx : Mask) {
if (idx < Numbers1.size()) {
Numbers.push_back(Numbers1[idx]);
} else {
Numbers.push_back(Numbers2[idx - Numbers1.size()]);
}
}
} else if (auto *IEI = dyn_cast<InsertElementInst>(CurrentV)) {
unsigned idx = cast<ConstantInt>(IEI->getOperand(2))->getZExtValue();
Numbers = NumberVectorBase(S, IEI->getOperand(0));
int ElNumber = Number(S, IEI->getOperand(1));
Numbers[idx] = ElNumber;
} else if (isa<SelectInst>(CurrentV) && getValueAddrSpace(CurrentV) != AddressSpace::Tracked) {
LiftSelect(S, cast<SelectInst>(CurrentV));
Numbers = S.AllVectorNumbering[CurrentV];
} else if (isa<PHINode>(CurrentV) && getValueAddrSpace(CurrentV) != AddressSpace::Tracked) {
SmallVector<int, 16> PHINumbers;
LiftPhi(S, cast<PHINode>(CurrentV), PHINumbers);
Numbers = S.AllVectorNumbering[CurrentV];
} else if (isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) ||
isa<SelectInst>(CurrentV)) {
// This is simple, we can just number them sequentially
for (unsigned i = 0; i < cast<VectorType>(CurrentV->getType())->getNumElements(); ++i) {
int Num = ++S.MaxPtrNumber;
Numbers.push_back(Num);
S.ReversePtrNumbering[Num] = CurrentV;
}
} else {
assert(false && "Unexpected vector generating operation");
}
S.AllVectorNumbering[CurrentV] = Numbers;
return Numbers;
}
std::vector<int> LateLowerGCFrame::NumberVector(State &S, Value *V) {
auto it = S.AllVectorNumbering.find(V);
if (it != S.AllVectorNumbering.end())
return it->second;
auto CurrentV = FindBaseValue(S, V);
assert(CurrentV.second == -1);
// E.g. if this is a gep, it's possible for the base to be a single ptr
if (isSpecialPtrVec(CurrentV.first->getType())) {
auto Numbers = NumberVectorBase(S, CurrentV.first);
S.AllVectorNumbering[V] = Numbers;
return Numbers;
} else {
std::vector<int> Numbers{};
Numbers.resize(cast<VectorType>(V->getType())->getNumElements(),
NumberBase(S, V, CurrentV.first));
return Numbers;
}
}
static void MaybeResize(BBState &BBS, unsigned Idx) {
if (BBS.Defs.size() <= Idx) {
BBS.Defs.resize(Idx + 1);
BBS.UpExposedUses.resize(Idx + 1);
BBS.UpExposedUsesUnrooted.resize(Idx + 1);
BBS.PhiOuts.resize(Idx + 1);
}
}
static bool HasBitSet(const BitVector &BV, unsigned Bit) {
return Bit < BV.size() && BV[Bit];
}
static void NoteDef(State &S, BBState &BBS, int Num, const std::vector<int> &SafepointsSoFar) {
assert(Num >= 0);
MaybeResize(BBS, Num);
assert(BBS.Defs[Num] == 0 && "SSA Violation or misnumbering?");
BBS.Defs[Num] = 1;
BBS.UpExposedUses[Num] = 0;
BBS.UpExposedUsesUnrooted[Num] = 0;
// This value could potentially be live at any following safe point
// if it ends up live out, so add it to the LiveIfLiveOut lists for all
// following safepoints.
for (int Safepoint : SafepointsSoFar) {
S.LiveIfLiveOut[Safepoint].push_back(Num);
}
}
void LateLowerGCFrame::MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr) {
int Num = -1;
Type *RT = Def->getType();
if (isSpecialPtr(RT)) {
assert(getValueAddrSpace(Def) == AddressSpace::Tracked &&
"Returned value of GC interest, but not tracked?");
Num = Number(S, Def);
}
else if (isUnionRep(RT)) {
// Probably a union return. Find the extractvalue
Num = Number(S, Def);
}
else if (isSpecialPtrVec(RT)) {
std::vector<int> Nums = NumberVector(S, Def);
for (int Num : Nums) {
NoteDef(S, BBS, Num, SafepointsSoFar);
if (!RefinedPtr.empty())
S.Refinements[Num] = RefinedPtr;
}
return;
}
else {
return;
}
NoteDef(S, BBS, Num, SafepointsSoFar);
if (!RefinedPtr.empty())
S.Refinements[Num] = std::move(RefinedPtr);
}
static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI) {
int Number = ++S.MaxSafepointNumber;
S.SafepointNumbering[CI] = Number;
S.ReverseSafepointNumbering.push_back(CI);
// Note which pointers are upward exposed live here. They need to be
// considered live at this safepoint even when they have a def earlier
// in this BB (i.e. even when they don't participate in the dataflow
// computation)
BBS.UpExposedUses |= BBS.UpExposedUsesUnrooted;
BBS.UpExposedUsesUnrooted.reset();
S.LiveSets.push_back(BBS.UpExposedUses);
S.LiveIfLiveOut.push_back(std::vector<int>{});
return Number;
}
void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses) {
// Short circuit to avoid having to deal with vectors of constants, etc.
if (isa<Constant>(V))
return;
else if (isSpecialPtrVec(V->getType())) {
std::vector<int> Nums = NumberVector(S, V);
for (int Num : Nums) {
MaybeResize(BBS, Num);
if (Num < 0)
continue;
Uses[Num] = 1;
}
}
else {
int Num = Number(S, V);
if (Num < 0)
return;
MaybeResize(BBS, Num);
Uses[Num] = 1;
}
}
void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI, BitVector &Uses) {
for (Use &U : UI.operands()) {
Value *V = U;
if (!isSpecialPtr(V->getType()) && !isSpecialPtrVec(V->getType()))
continue;
NoteUse(S, BBS, V, Uses);
}
}
template <typename VisitInst, typename callback>
void RecursivelyVisit(callback f, Value *V) {
for (Use &VU : V->uses()) {
User *TheUser = VU.getUser();
if (isa<VisitInst>(TheUser))
f(VU);
if (isa<CallInst>(TheUser) || isa<LoadInst>(TheUser) ||
isa<SelectInst>(TheUser) || isa<PHINode>(TheUser) ||
isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser))
continue;
if (isa<GetElementPtrInst>(TheUser) || isa<BitCastInst>(TheUser) || isa<AddrSpaceCastInst>(TheUser)) {
RecursivelyVisit<VisitInst, callback>(f, TheUser);
continue;
}
llvm_dump(V);
llvm_dump(TheUser);
assert(false && "Unexpected instruction");
}
}
static void dumpBitVectorValues(State &S, BitVector &BV) {
bool first = true;
for (int Idx = BV.find_first(); Idx >= 0; Idx = BV.find_next(Idx)) {
if (!first)
dbgs() << ", ";
first = false;
S.ReversePtrNumbering[Idx]->printAsOperand(dbgs());
}
}
/* Debugging utility to dump liveness information */
JL_USED_FUNC static void dumpLivenessState(Function &F, State &S) {
for (auto &BB : F) {
dbgs() << "Liveness analysis for BB " << BB.getName();
dbgs() << "\n\tDefs: ";
dumpBitVectorValues(S, S.BBStates[&BB].Defs);
dbgs() << "\n\tPhiOuts: ";
dumpBitVectorValues(S, S.BBStates[&BB].PhiOuts);
dbgs() << "\n\tUpExposedUsesUnrooted: ";
dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUsesUnrooted);
dbgs() << "\n\tUpExposedUses: ";
dumpBitVectorValues(S, S.BBStates[&BB].UpExposedUses);
dbgs() << "\n\tLiveIn: ";
dumpBitVectorValues(S, S.BBStates[&BB].LiveIn);
dbgs() << "\n\tLiveOut: ";
dumpBitVectorValues(S, S.BBStates[&BB].LiveOut);
dbgs() << "\n";
}
}
// Check if this is a load from an immutable value. The easiest
// way to do so is to look at the tbaa and see if it derives from
// jtbaa_immut.
static bool isLoadFromImmut(LoadInst *LI)
{
if (LI->getMetadata(LLVMContext::MD_invariant_load))
return true;
MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
if (!TBAA)
return false;
while (TBAA->getNumOperands() > 1) {
TBAA = cast<MDNode>(TBAA->getOperand(1).get());
auto str = cast<MDString>(TBAA->getOperand(0))->getString();
if (str == "jtbaa_immut" || str == "jtbaa_const") {
return true;
}
}
return false;
}
// Check if this is a load from an constant global.
static bool isLoadFromConstGV(LoadInst *LI)
{
// We only emit single slot GV in codegen
// but LLVM global merging can change the pointer operands to GEPs/bitcasts
if (!isa<GlobalVariable>(LI->getPointerOperand()->stripInBoundsOffsets()))
return false;
MDNode *TBAA = LI->getMetadata(LLVMContext::MD_tbaa);
if (!TBAA)
return false;
while (TBAA->getNumOperands() > 1) {
TBAA = cast<MDNode>(TBAA->getOperand(1).get());
if (cast<MDString>(TBAA->getOperand(0))->getString() == "jtbaa_const") {
return true;
}
}
return false;
}
static bool LooksLikeFrameRef(Value *V) {
if (isSpecialPtr(V->getType()))
return false;
if (isa<GetElementPtrInst>(V))
return LooksLikeFrameRef(cast<GetElementPtrInst>(V)->getOperand(0));
return isa<Argument>(V);
}
SmallVector<int, 1> LateLowerGCFrame::GetPHIRefinements(PHINode *Phi, State &S)
{
// The returned vector can violate the domination property of the Refinements map.
// However, we can't know for sure if this is valid here since incoming values
// that does not dominate the PHI node may be externally rooted (i.e. can be refined to -1)
// We only know that after scanning the whole function so we'll record the possibly invalid
// edges here and fix them up at the end of `LocalScan`. (See `FixUpRefinements` below).
auto nIncoming = Phi->getNumIncomingValues();
SmallVector<int, 1> RefinedPtr(nIncoming);
for (unsigned i = 0; i < nIncoming; ++i)
RefinedPtr[i] = Number(S, Phi->getIncomingValue(i));
return RefinedPtr;
}
JL_USED_FUNC static void DumpRefinements(State *S)
{
for (auto &kv: S->Refinements) {
int Num = kv.first;
if (Num < 0)
continue;
jl_safe_printf("Refinements for %d -- ", Num);
auto V = S->ReversePtrNumbering[Num];
llvm_dump(V);
for (auto refine: kv.second) {
if (refine < 0) {
jl_safe_printf(" %d\n", refine);
continue;
}
jl_safe_printf(" %d: ", refine);
auto R = S->ReversePtrNumbering[refine];
llvm_dump(R);
}
}
}
void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
{
// Now we have all the possible refinement information, we can remove ones for the invalid
// * First find all values that must be externally rooted.
// Values may either be obviously externally rooted (e.g. arguments) - (this is indicated by a
// value of -1 or -2 in the refinement map), or may be externally rooted by refinement to other
// values. Thus a value is not externally rooted if it either:
// either:
// - Has no refinements (all obiviously externally rooted values are annotated by -1/-2 in the
// refinement map).
// - Recursively reaches a not-externally rooted value through its refinements
//
// We compute this set by first assuming all values are externally rooted and then iteratively
// removing the ones that are not.
BitVector extern_rooted(S.MaxPtrNumber + 1, true);
BitVector perm_rooted(S.MaxPtrNumber + 1, true);
// * First clear all values that are not derived from anything.
// This only needs to be done once.
for (int i = 0; i <= S.MaxPtrNumber; i++) {