forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ScalarReplAggregates.cpp
2784 lines (2443 loc) · 109 KB
/
ScalarReplAggregates.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This transformation implements the well known scalar replacement of
// aggregates transformation. This xform breaks up alloca instructions of
// aggregate type (structure or array) into individual alloca instructions for
// each member (if possible). Then, if possible, it transforms the individual
// alloca instructions into nice clean scalar SSA form.
//
// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they
// often interact, especially for C++ programs. As such, iterating between
// SRoA, then Mem2Reg until we run out of things to promote works well.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "scalarrepl"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Constants.h"
#include "llvm/DIBuilder.h"
#include "llvm/DebugInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/GlobalVariable.h"
#include "llvm/IRBuilder.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
STATISTIC(NumConverted, "Number of aggregates converted to scalar");
STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
: FunctionPass(ID), HasDomTree(hasDT) {
if (T == -1)
SRThreshold = 128;
else
SRThreshold = T;
if (ST == -1)
StructMemberThreshold = 32;
else
StructMemberThreshold = ST;
if (AT == -1)
ArrayElementThreshold = 8;
else
ArrayElementThreshold = AT;
if (SLT == -1)
// Do not limit the scalar integer load size if no threshold is given.
ScalarLoadThreshold = -1;
else
ScalarLoadThreshold = SLT;
}
bool runOnFunction(Function &F);
bool performScalarRepl(Function &F);
bool performPromotion(Function &F);
private:
bool HasDomTree;
TargetData *TD;
/// DeadInsts - Keep track of instructions we have made dead, so that
/// we can remove them after we are done working.
SmallVector<Value*, 32> DeadInsts;
/// AllocaInfo - When analyzing uses of an alloca instruction, this captures
/// information about the uses. All these fields are initialized to false
/// and set to true when something is learned.
struct AllocaInfo {
/// The alloca to promote.
AllocaInst *AI;
/// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
/// looping and avoid redundant work.
SmallPtrSet<PHINode*, 8> CheckedPHIs;
/// isUnsafe - This is set to true if the alloca cannot be SROA'd.
bool isUnsafe : 1;
/// isMemCpySrc - This is true if this aggregate is memcpy'd from.
bool isMemCpySrc : 1;
/// isMemCpyDst - This is true if this aggregate is memcpy'd into.
bool isMemCpyDst : 1;
/// hasSubelementAccess - This is true if a subelement of the alloca is
/// ever accessed, or false if the alloca is only accessed with mem
/// intrinsics or load/store that only access the entire alloca at once.
bool hasSubelementAccess : 1;
/// hasALoadOrStore - This is true if there are any loads or stores to it.
/// The alloca may just be accessed with memcpy, for example, which would
/// not set this.
bool hasALoadOrStore : 1;
explicit AllocaInfo(AllocaInst *ai)
: AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
hasSubelementAccess(false), hasALoadOrStore(false) {}
};
/// SRThreshold - The maximum alloca size to considered for SROA.
unsigned SRThreshold;
/// StructMemberThreshold - The maximum number of members a struct can
/// contain to be considered for SROA.
unsigned StructMemberThreshold;
/// ArrayElementThreshold - The maximum number of elements an array can
/// have to be considered for SROA.
unsigned ArrayElementThreshold;
/// ScalarLoadThreshold - The maximum size in bits of scalars to load when
/// converting to scalar
unsigned ScalarLoadThreshold;
void MarkUnsafe(AllocaInfo &I, Instruction *User) {
I.isUnsafe = true;
DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n');
}
bool isSafeAllocaToScalarRepl(AllocaInst *AI);
void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
AllocaInfo &Info);
void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
Type *MemOpType, bool isStore, AllocaInfo &Info,
Instruction *TheAccess, bool AllowWholeAccess);
bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
Type *&IdxTy);
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts);
bool ShouldAttemptScalarRepl(AllocaInst *AI);
static MemTransferInst *isOnlyCopiedFromConstantGlobal(
AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
};
// SROA_DT - SROA that uses DominatorTree.
struct SROA_DT : public SROA {
static char ID;
public:
SROA_DT(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
SROA(T, true, ID, ST, AT, SLT) {
initializeSROA_DTPass(*PassRegistry::getPassRegistry());
}
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
AU.setPreservesCFG();
}
};
// SROA_SSAUp - SROA that uses SSAUpdater.
struct SROA_SSAUp : public SROA {
static char ID;
public:
SROA_SSAUp(int T = -1, int ST = -1, int AT = -1, int SLT = -1) :
SROA(T, false, ID, ST, AT, SLT) {
initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
}
// getAnalysisUsage - This pass does not require any passes, but we know it
// will not alter the CFG, so say so.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
}
};
}
char SROA_DT::ID = 0;
char SROA_SSAUp::ID = 0;
INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
"Scalar Replacement of Aggregates (DT)", false, false)
INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
"Scalar Replacement of Aggregates (SSAUp)", false, false)
INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
"Scalar Replacement of Aggregates (SSAUp)", false, false)
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
bool UseDomTree,
int StructMemberThreshold,
int ArrayElementThreshold,
int ScalarLoadThreshold) {
if (UseDomTree)
return new SROA_DT(Threshold, StructMemberThreshold, ArrayElementThreshold,
ScalarLoadThreshold);
return new SROA_SSAUp(Threshold, StructMemberThreshold,
ArrayElementThreshold, ScalarLoadThreshold);
}
//===----------------------------------------------------------------------===//
// Convert To Scalar Optimization.
//===----------------------------------------------------------------------===//
namespace {
/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
/// optimization, which scans the uses of an alloca and determines if it can
/// rewrite it in terms of a single new alloca that can be mem2reg'd.
class ConvertToScalarInfo {
/// AllocaSize - The size of the alloca being considered in bytes.
unsigned AllocaSize;
const TargetData &TD;
unsigned ScalarLoadThreshold;
/// IsNotTrivial - This is set to true if there is some access to the object
/// which means that mem2reg can't promote it.
bool IsNotTrivial;
/// ScalarKind - Tracks the kind of alloca being considered for promotion,
/// computed based on the uses of the alloca rather than the LLVM type system.
enum {
Unknown,
// Accesses via GEPs that are consistent with element access of a vector
// type. This will not be converted into a vector unless there is a later
// access using an actual vector type.
ImplicitVector,
// Accesses via vector operations and GEPs that are consistent with the
// layout of a vector type.
Vector,
// An integer bag-of-bits with bitwise operations for insertion and
// extraction. Any combination of types can be converted into this kind
// of scalar.
Integer
} ScalarKind;
/// VectorTy - This tracks the type that we should promote the vector to if
/// it is possible to turn it into a vector. This starts out null, and if it
/// isn't possible to turn into a vector type, it gets set to VoidTy.
VectorType *VectorTy;
/// HadNonMemTransferAccess - True if there is at least one access to the
/// alloca that is not a MemTransferInst. We don't want to turn structs into
/// large integers unless there is some potential for optimization.
bool HadNonMemTransferAccess;
/// HadDynamicAccess - True if some element of this alloca was dynamic.
/// We don't yet have support for turning a dynamic access into a large
/// integer.
bool HadDynamicAccess;
public:
explicit ConvertToScalarInfo(unsigned Size, const TargetData &td,
unsigned SLT)
: AllocaSize(Size), TD(td), ScalarLoadThreshold(SLT), IsNotTrivial(false),
ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
private:
bool CanConvertToScalar(Value *V, uint64_t Offset, Value* NonConstantIdx);
void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset,
Value *NonConstantIdx);
Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder);
Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder);
};
} // end anonymous namespace.
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
return 0;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
// ScalarKind. Treat it as an Integer below.
if (ScalarKind == Unknown)
ScalarKind = Integer;
if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
ScalarKind = Integer;
// If we were able to find a vector type that can handle this with
// insert/extract elements, and if there was at least one use that had
// a vector type, promote this to a vector. We don't want to promote
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
Type *NewTy;
if (ScalarKind == Vector) {
assert(VectorTy && "Missing type for vector scalar.");
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
} else {
unsigned BitWidth = AllocaSize * 8;
// Do not convert to scalar integer if the alloca size exceeds the
// scalar load threshold.
if (BitWidth > ScalarLoadThreshold)
return 0;
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
return 0;
// Dynamic accesses on integers aren't yet supported. They need us to shift
// by a dynamic amount which could be difficult to work out as we might not
// know whether to use a left or right shift.
if (ScalarKind == Integer && HadDynamicAccess)
return 0;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
ConvertUsesToScalar(AI, NewAI, 0, 0);
return NewAI;
}
/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
/// (VectorTy) so far at the offset specified by Offset (which is specified in
/// bytes).
///
/// There are two cases we handle here:
/// 1) A union of vector types of the same size and potentially its elements.
/// Here we turn element accesses into insert/extract element operations.
/// This promotes a <4 x float> with a store of float to the third element
/// into a <4 x float> that uses insert element.
/// 2) A fully general blob of memory, which we turn into some (potentially
/// large) integer type with extract and insert operations where the loads
/// and stores would mutate the memory. We mark this by setting VectorTy
/// to VoidTy.
void ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
uint64_t Offset) {
// If we already decided to turn this into a blob of integer memory, there is
// nothing to be done.
if (ScalarKind == Integer)
return;
// If this could be contributing to a vector, analyze it.
// If the In type is a vector that is the same size as the alloca, see if it
// matches the existing VecTy.
if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
if (MergeInVectorType(VInTy, Offset))
return;
} else if (In->isFloatTy() || In->isDoubleTy() ||
(In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
// Full width accesses can be ignored, because they can always be turned
// into bitcasts.
unsigned EltSize = In->getPrimitiveSizeInBits()/8;
if (EltSize == AllocaSize)
return;
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
// compatible with it.
if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
(!VectorTy || EltSize == VectorTy->getElementType()
->getPrimitiveSizeInBits()/8)) {
if (!VectorTy) {
ScalarKind = ImplicitVector;
VectorTy = VectorType::get(In, AllocaSize/EltSize);
}
return;
}
}
// Otherwise, we have a case that we can't handle with an optimized vector
// form. We can still turn this into a large integer.
ScalarKind = Integer;
}
/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
/// returning true if the type was successfully merged and false otherwise.
bool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
uint64_t Offset) {
if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
// If we're storing/loading a vector of the right size, allow it as a
// vector. If this the first vector we see, remember the type so that
// we know the element size. If this is a subsequent access, ignore it
// even if it is a differing type but the same size. Worst case we can
// bitcast the resultant vectors.
if (!VectorTy)
VectorTy = VInTy;
ScalarKind = Vector;
return true;
}
return false;
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
/// is the current offset from the base of the alloca being analyzed.
///
/// If we see at least one access to the value that is as a vector type, set the
/// SawVec flag.
bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
Value* NonConstantIdx) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// Don't break volatile loads.
if (!LI->isSimple())
return false;
// Don't touch MMX operations.
if (LI->getType()->isX86_MMXTy())
return false;
HadNonMemTransferAccess = true;
MergeInTypeForLoadOrStore(LI->getType(), Offset);
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
// Storing the pointer, not into the value?
if (SI->getOperand(0) == V || !SI->isSimple()) return false;
// Don't touch MMX operations.
if (SI->getOperand(0)->getType()->isX86_MMXTy())
return false;
HadNonMemTransferAccess = true;
MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
continue;
}
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
if (!onlyUsedByLifetimeMarkers(BCI))
IsNotTrivial = true; // Can't be mem2reg'd.
if (!CanConvertToScalar(BCI, Offset, NonConstantIdx))
return false;
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// If this is a GEP with a variable indices, we can't handle it.
PointerType* PtrTy = dyn_cast<PointerType>(GEP->getPointerOperandType());
if (!PtrTy)
return false;
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
Value *GEPNonConstantIdx = 0;
if (!GEP->hasAllConstantIndices()) {
if (!isa<VectorType>(PtrTy->getElementType()))
return false;
if (NonConstantIdx)
return false;
GEPNonConstantIdx = Indices.pop_back_val();
if (!GEPNonConstantIdx->getType()->isIntegerTy(32))
return false;
HadDynamicAccess = true;
} else
GEPNonConstantIdx = NonConstantIdx;
uint64_t GEPOffset = TD.getIndexedOffset(PtrTy,
Indices);
// See if all uses can be converted.
if (!CanConvertToScalar(GEP, Offset+GEPOffset, GEPNonConstantIdx))
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
HadNonMemTransferAccess = true;
continue;
}
// If this is a constant sized memset of a constant value (e.g. 0) we can
// handle it.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
// Store of constant value.
if (!isa<ConstantInt>(MSI->getValue()))
return false;
// Store of constant size.
ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
if (!Len)
return false;
// If the size differs from the alloca, we can only convert the alloca to
// an integer bag-of-bits.
// FIXME: This should handle all of the cases that are currently accepted
// as vector element insertions.
if (Len->getZExtValue() != AllocaSize || Offset != 0)
ScalarKind = Integer;
IsNotTrivial = true; // Can't be mem2reg'd.
HadNonMemTransferAccess = true;
continue;
}
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
// Store to dynamic index.
if (NonConstantIdx)
return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
continue;
}
// If this is a lifetime intrinsic, we can handle it.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
continue;
}
}
// Otherwise, we cannot handle this!
return false;
}
return true;
}
/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
/// directly. This happens when we are converting an "integer union" to a
/// single integer scalar, or when we are converting a "vector union" to a
/// vector with insert/extractelement instructions.
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right. By the end of this, there should be no uses of Ptr.
void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
uint64_t Offset,
Value* NonConstantIdx) {
while (!Ptr->use_empty()) {
Instruction *User = cast<Instruction>(Ptr->use_back());
if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
ConvertUsesToScalar(CI, NewAI, Offset, NonConstantIdx);
CI->eraseFromParent();
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
if (!GEP->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
Indices);
ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8, NonConstantIdx);
GEP->eraseFromParent();
continue;
}
IRBuilder<> Builder(User);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
// The load is a bit extract from NewAI shifted right by Offset bits.
Value *LoadedVal = Builder.CreateLoad(NewAI);
Value *NewLoadVal
= ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset,
NonConstantIdx, Builder);
LI->replaceAllUsesWith(NewLoadVal);
LI->eraseFromParent();
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
NonConstantIdx, Builder);
Builder.CreateStore(New, NewAI);
SI->eraseFromParent();
// If the load we just inserted is now dead, then the inserted store
// overwrote the entire thing.
if (Old->use_empty())
Old->eraseFromParent();
continue;
}
// If this is a constant sized memset of a constant value (e.g. 0) we can
// transform it into a store of the expanded constant value.
if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
assert(MSI->getRawDest() == Ptr && "Consistency error!");
assert(!NonConstantIdx && "Cannot replace dynamic memset with insert");
int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
unsigned NumBytes = static_cast<unsigned>(SNumBytes);
unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
// Compute the value replicated the right number of times.
APInt APVal(NumBytes*8, Val);
// Splat the value if non-zero.
if (Val)
for (unsigned i = 1; i != NumBytes; ++i)
APVal |= APVal << 8;
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, 0, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
// the entire thing.
if (Old->use_empty())
Old->eraseFromParent();
}
MSI->eraseFromParent();
continue;
}
// If this is a memcpy or memmove into or out of the whole allocation, we
// can handle it like a load or store of the scalar type.
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
assert(Offset == 0 && "must be store to start of alloca");
assert(!NonConstantIdx && "Cannot replace dynamic transfer with insert");
// If the source and destination are both to the same alloca, then this is
// a noop copy-to-self, just delete it. Otherwise, emit a load and store
// as appropriate.
AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
// Dest must be OrigAI, change this to be a load from the original
// pointer (bitcasted), then a store to our new alloca.
assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
Value *SrcPtr = MTI->getSource();
PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
SPTy->getAddressSpace());
}
SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
SrcVal->setAlignment(MTI->getAlignment());
Builder.CreateStore(SrcVal, NewAI);
} else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
// Src must be OrigAI, change this to be a load from NewAI then a store
// through the original dest pointer (bitcasted).
assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
PointerType* AIPTy = cast<PointerType>(NewAI->getType());
if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
AIPTy = PointerType::get(AIPTy->getElementType(),
DPTy->getAddressSpace());
}
Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
NewStore->setAlignment(MTI->getAlignment());
} else {
// Noop transfer. Src == Dst
}
MTI->eraseFromParent();
continue;
}
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
II->getIntrinsicID() == Intrinsic::lifetime_end) {
// There's no need to preserve these, as the resulting alloca will be
// converted to a register anyways.
II->eraseFromParent();
continue;
}
}
llvm_unreachable("Unsupported operation!");
}
}
/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
/// or vector value FromVal, extracting the bits from the offset specified by
/// Offset. This returns the value, which is of type ToType.
///
/// This happens when we are converting an "integer union" to a single
/// integer scalar, or when we are converting a "vector union" to a vector with
/// insert/extractelement instructions.
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
Value *ConvertToScalarInfo::
ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder) {
// If the load is of the whole new alloca, no conversion is needed.
Type *FromType = FromVal->getType();
if (FromType == ToType && Offset == 0)
return FromVal;
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
if (FromTypeSize == ToTypeSize)
return Builder.CreateBitCast(FromVal, ToType);
// Otherwise it must be an element access.
unsigned Elt = 0;
if (Offset) {
unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
Elt = Offset/EltSize;
assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
}
// Return the element extracted out of it.
Value *Idx;
if (NonConstantIdx) {
if (Elt)
Idx = Builder.CreateAdd(NonConstantIdx,
Builder.getInt32(Elt),
"dyn.offset");
else
Idx = NonConstantIdx;
} else
Idx = Builder.getInt32(Elt);
Value *V = Builder.CreateExtractElement(FromVal, Idx);
if (V->getType() != ToType)
V = Builder.CreateBitCast(V, ToType);
return V;
}
// If ToType is a first class aggregate, extract out each of the pieces and
// use insertvalue's to form the FCA.
if (StructType *ST = dyn_cast<StructType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
Value *Res = UndefValue::get(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
Offset+i*EltSize, 0, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
}
// Otherwise, this must be a union that was converted to an integer value.
IntegerType *NTy = cast<IntegerType>(FromVal->getType());
// If this is a big-endian system and the load is narrower than the
// full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0;
if (TD.isBigEndian()) {
// On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
ShAmt = TD.getTypeStoreSizeInBits(NTy) -
TD.getTypeStoreSizeInBits(ToType) - Offset;
} else {
ShAmt = Offset;
}
// Note: we support negative bitwidths (with shl) which are not defined.
// We do this to support (f.e.) loads off the end of a structure where
// only some bits are used.
if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateLShr(FromVal,
ConstantInt::get(FromVal->getType(), ShAmt));
else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
FromVal = Builder.CreateShl(FromVal,
ConstantInt::get(FromVal->getType(), -ShAmt));
// Finally, unconditionally truncate the integer to the right width.
unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
if (LIBitWidth < NTy->getBitWidth())
FromVal =
Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth));
else if (LIBitWidth > NTy->getBitWidth())
FromVal =
Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
LIBitWidth));
// If the result is an integer, this is a trunc or bitcast.
if (ToType->isIntegerTy()) {
// Should be done.
} else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
FromVal = Builder.CreateBitCast(FromVal, ToType);
} else {
// Otherwise must be a pointer.
FromVal = Builder.CreateIntToPtr(FromVal, ToType);
}
assert(FromVal->getType() == ToType && "Didn't convert right?");
return FromVal;
}
/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
/// or vector value "Old" at the offset specified by Offset.
///
/// This happens when we are converting an "integer union" to a
/// single integer scalar, or when we are converting a "vector union" to a
/// vector with insert/extractelement instructions.
///
/// Offset is an offset from the original alloca, in bits that need to be
/// shifted to the right.
///
/// NonConstantIdx is an index value if there was a GEP with a non-constant
/// index value. If this is 0 then all GEPs used to find this insert address
/// are constant.
Value *ConvertToScalarInfo::
ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t Offset, Value* NonConstantIdx,
IRBuilder<> &Builder) {
// Convert the stored type to the actual type, shift it left to insert
// then 'or' into place.
Type *AllocaType = Old->getType();
LLVMContext &Context = Old->getContext();
if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
// Changing the whole vector with memset or with an access of a different
// vector type?
if (ValSize == VecSize)
return Builder.CreateBitCast(SV, AllocaType);
// Must be an element insertion.
Type *EltTy = VTy->getElementType();
if (SV->getType() != EltTy)
SV = Builder.CreateBitCast(SV, EltTy);
uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
unsigned Elt = Offset/EltSize;
Value *Idx;
if (NonConstantIdx) {
if (Elt)
Idx = Builder.CreateAdd(NonConstantIdx,
Builder.getInt32(Elt),
"dyn.offset");
else
Idx = NonConstantIdx;
} else
Idx = Builder.getInt32(Elt);
return Builder.CreateInsertElement(Old, SV, Idx);
}
// If SV is a first-class aggregate value, insert each value recursively.
if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into struct types not supported");
const StructLayout &Layout = *TD.getStructLayout(ST);
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
0, Builder);
}
return Old;
}
if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
assert(!NonConstantIdx &&
"Dynamic indexing into array types not supported");
uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
}
return Old;
}
// If SV is a float, convert it to the appropriate integer type.
// If it is a pointer, do the same.
unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
else if (SV->getType()->isPointerTy())
SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
// Zero extend or truncate the value if needed.
if (SV->getType() != AllocaType) {
if (SV->getType()->getPrimitiveSizeInBits() <
AllocaType->getPrimitiveSizeInBits())
SV = Builder.CreateZExt(SV, AllocaType);
else {
// Truncation may be needed if storing more than the alloca can hold
// (undefined behavior).
SV = Builder.CreateTrunc(SV, AllocaType);
SrcWidth = DestWidth;
SrcStoreWidth = DestStoreWidth;
}
}
// If this is a big-endian system and the store is narrower than the
// full alloca type, we need to do a shift to get the right bits.
int ShAmt = 0;
if (TD.isBigEndian()) {
// On big-endian machines, the lowest bit is stored at the bit offset
// from the pointer given by getTypeStoreSizeInBits. This matters for
// integers with a bitwidth that is not a multiple of 8.
ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
} else {
ShAmt = Offset;
}
// Note: we support negative bitwidths (with shr) which are not defined.
// We do this to support (f.e.) stores off the end of a structure where
// only some bits in the structure are set.
APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
Mask <<= ShAmt;
} else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {