forked from llvm-mirror/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ARMLoadStoreOptimizer.cpp
2324 lines (2111 loc) · 81.2 KB
/
ARMLoadStoreOptimizer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
/// \file This file contains a pass that performs load / store related peephole
/// optimizations. This pass should be run after register allocation.
//
//===----------------------------------------------------------------------===//
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "ThumbRegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "arm-ldst-opt"
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
namespace llvm {
void initializeARMLoadStoreOptPass(PassRegistry &);
}
#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
namespace {
/// Post- register allocation pass the combine load / store instructions to
/// form ldm / stm instructions.
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
ARMLoadStoreOpt() : MachineFunctionPass(ID) {
initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
}
const MachineFunction *MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
const TargetLowering *TL;
ARMFunctionInfo *AFI;
LivePhysRegs LiveRegs;
RegisterClassInfo RegClassInfo;
MachineBasicBlock::const_iterator LiveRegPos;
bool LiveRegsValid;
bool RegClassInfoValid;
bool isThumb1, isThumb2;
bool runOnMachineFunction(MachineFunction &Fn) override;
const char *getPassName() const override {
return ARM_LOAD_STORE_OPT_NAME;
}
private:
/// A set of load/store MachineInstrs with same base register sorted by
/// offset.
struct MemOpQueueEntry {
MachineInstr *MI;
int Offset; ///< Load/Store offset.
unsigned Position; ///< Position as counted from end of basic block.
MemOpQueueEntry(MachineInstr *MI, int Offset, unsigned Position)
: MI(MI), Offset(Offset), Position(Position) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
/// A set of MachineInstrs that fulfill (nearly all) conditions to get
/// merged into a LDM/STM.
struct MergeCandidate {
/// List of instructions ordered by load/store offset.
SmallVector<MachineInstr*, 4> Instrs;
/// Index in Instrs of the instruction being latest in the schedule.
unsigned LatestMIIdx;
/// Index in Instrs of the instruction being earliest in the schedule.
unsigned EarliestMIIdx;
/// Index into the basic block where the merged instruction will be
/// inserted. (See MemOpQueueEntry.Position)
unsigned InsertPos;
/// Whether the instructions can be merged into a ldm/stm instruction.
bool CanMergeToLSMulti;
/// Whether the instructions can be merged into a ldrd/strd instruction.
bool CanMergeToLSDouble;
};
SpecificBumpPtrAllocator<MergeCandidate> Allocator;
SmallVector<const MergeCandidate*,4> Candidates;
SmallVector<MachineInstr*,4> MergeBaseCandidates;
void moveLiveRegsBefore(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator Before);
unsigned findFreeReg(const TargetRegisterClass &RegClass);
void UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
bool MergeBaseUpdateLoadStore(MachineInstr *MI);
bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
char ARMLoadStoreOpt::ID = 0;
}
INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
static bool definesCPSR(const MachineInstr *MI) {
for (const auto &MO : MI->operands()) {
if (!MO.isReg())
continue;
if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
// If the instruction has live CPSR def, then it's not safe to fold it
// into load / store.
return true;
}
return false;
}
static int getMemoryOpOffset(const MachineInstr *MI) {
unsigned Opcode = MI->getOpcode();
bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
unsigned NumOperands = MI->getDesc().getNumOperands();
unsigned OffField = MI->getOperand(NumOperands-3).getImm();
if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
return OffField;
// Thumb1 immediate offsets are scaled by 4
if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
return OffField * 4;
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
: ARM_AM::getAM5Offset(OffField) * 4;
ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
: ARM_AM::getAM5Op(OffField);
if (Op == ARM_AM::sub)
return -Offset;
return Offset;
}
static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
return MI.getOperand(1);
}
static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
return MI.getOperand(0);
}
static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::LDRi12:
++NumLDMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::LDMIA;
case ARM_AM::da: return ARM::LDMDA;
case ARM_AM::db: return ARM::LDMDB;
case ARM_AM::ib: return ARM::LDMIB;
}
case ARM::STRi12:
++NumSTMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::STMIA;
case ARM_AM::da: return ARM::STMDA;
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
case ARM::tLDRi:
case ARM::tLDRspi:
// tLDMIA is writeback-only - unless the base register is in the input
// reglist.
++NumLDMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::tLDMIA;
}
case ARM::tSTRi:
case ARM::tSTRspi:
// There is no non-writeback tSTMIA either.
++NumSTMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::tSTMIA_UPD;
}
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::t2LDMIA;
case ARM_AM::db: return ARM::t2LDMDB;
}
case ARM::t2STRi8:
case ARM::t2STRi12:
++NumSTMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::t2STMIA;
case ARM_AM::db: return ARM::t2STMDB;
}
case ARM::VLDRS:
++NumVLDMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMSIA;
case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
}
case ARM::VSTRS:
++NumVSTMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMSIA;
case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
}
case ARM::VLDRD:
++NumVLDMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VLDMDIA;
case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
}
case ARM::VSTRD:
++NumVSTMGened;
switch (Mode) {
default: llvm_unreachable("Unhandled submode!");
case ARM_AM::ia: return ARM::VSTMDIA;
case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
}
}
}
static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
switch (Opcode) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::LDMIA_RET:
case ARM::LDMIA:
case ARM::LDMIA_UPD:
case ARM::STMIA:
case ARM::STMIA_UPD:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
case ARM::t2STMIA:
case ARM::t2STMIA_UPD:
case ARM::VLDMSIA:
case ARM::VLDMSIA_UPD:
case ARM::VSTMSIA:
case ARM::VSTMSIA_UPD:
case ARM::VLDMDIA:
case ARM::VLDMDIA_UPD:
case ARM::VSTMDIA:
case ARM::VSTMDIA_UPD:
return ARM_AM::ia;
case ARM::LDMDA:
case ARM::LDMDA_UPD:
case ARM::STMDA:
case ARM::STMDA_UPD:
return ARM_AM::da;
case ARM::LDMDB:
case ARM::LDMDB_UPD:
case ARM::STMDB:
case ARM::STMDB_UPD:
case ARM::t2LDMDB:
case ARM::t2LDMDB_UPD:
case ARM::t2STMDB:
case ARM::t2STMDB_UPD:
case ARM::VLDMSDB_UPD:
case ARM::VSTMSDB_UPD:
case ARM::VLDMDDB_UPD:
case ARM::VSTMDDB_UPD:
return ARM_AM::db;
case ARM::LDMIB:
case ARM::LDMIB_UPD:
case ARM::STMIB:
case ARM::STMIB_UPD:
return ARM_AM::ib;
}
}
static bool isT1i32Load(unsigned Opc) {
return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
}
static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
}
static bool isi32Load(unsigned Opc) {
return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
}
static bool isT1i32Store(unsigned Opc) {
return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
}
static bool isT2i32Store(unsigned Opc) {
return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
}
static bool isi32Store(unsigned Opc) {
return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
}
static bool isLoadSingle(unsigned Opc) {
return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
}
static unsigned getImmScale(unsigned Opc) {
switch (Opc) {
default: llvm_unreachable("Unhandled opcode!");
case ARM::tLDRi:
case ARM::tSTRi:
case ARM::tLDRspi:
case ARM::tSTRspi:
return 1;
case ARM::tLDRHi:
case ARM::tSTRHi:
return 2;
case ARM::tLDRBi:
case ARM::tSTRBi:
return 4;
}
}
static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return 0;
case ARM::LDRi12:
case ARM::STRi12:
case ARM::tLDRi:
case ARM::tSTRi:
case ARM::tLDRspi:
case ARM::tSTRspi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::t2STRi12:
case ARM::VLDRS:
case ARM::VSTRS:
return 4;
case ARM::VLDRD:
case ARM::VSTRD:
return 8;
case ARM::LDMIA:
case ARM::LDMDA:
case ARM::LDMDB:
case ARM::LDMIB:
case ARM::STMIA:
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
case ARM::tLDMIA:
case ARM::tLDMIA_UPD:
case ARM::tSTMIA_UPD:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2STMIA:
case ARM::t2STMDB:
case ARM::VLDMSIA:
case ARM::VSTMSIA:
return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
case ARM::VLDMDIA:
case ARM::VSTMDIA:
return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
}
}
/// Update future uses of the base register with the offset introduced
/// due to writeback. This function only works on Thumb1.
void
ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned Base,
unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg) {
assert(isThumb1 && "Can only update base register uses for Thumb1!");
// Start updating any instructions with immediate offsets. Insert a SUB before
// the first non-updateable instruction (if any).
for (; MBBI != MBB.end(); ++MBBI) {
bool InsertSub = false;
unsigned Opc = MBBI->getOpcode();
if (MBBI->readsRegister(Base)) {
int Offset;
bool IsLoad =
Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
bool IsStore =
Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
if (IsLoad || IsStore) {
// Loads and stores with immediate offsets can be updated, but only if
// the new offset isn't negative.
// The MachineOperand containing the offset immediate is the last one
// before predicates.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
// The offsets are scaled by 1, 2 or 4 depending on the Opcode.
Offset = MO.getImm() - WordOffset * getImmScale(Opc);
// If storing the base register, it needs to be reset first.
unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
MO.setImm(Offset);
else
InsertSub = true;
} else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
!definesCPSR(MBBI)) {
// SUBS/ADDS using this register, with a dead def of the CPSR.
// Merge it with the update; if the merged offset is too large,
// insert a new sub instead.
MachineOperand &MO =
MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
Offset = (Opc == ARM::tSUBi8) ?
MO.getImm() + WordOffset * 4 :
MO.getImm() - WordOffset * 4 ;
if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
// FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
// Offset == 0.
MO.setImm(Offset);
// The base register has now been reset, so exit early.
return;
} else {
InsertSub = true;
}
} else {
// Can't update the instruction.
InsertSub = true;
}
} else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) {
// Since SUBS sets the condition flags, we can't place the base reset
// after an instruction that has a live CPSR def.
// The base register might also contain an argument for a function call.
InsertSub = true;
}
if (InsertSub) {
// An instruction above couldn't be updated, so insert a sub.
AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
return;
}
if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
// Register got killed. Stop updating.
return;
}
// End of block was reached.
if (MBB.succ_size() > 0) {
// FIXME: Because of a bug, live registers are sometimes missing from
// the successor blocks' live-in sets. This means we can't trust that
// information and *always* have to reset at the end of a block.
// See PR21029.
if (MBBI != MBB.end()) --MBBI;
AddDefaultT1CC(
BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
.addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
}
}
/// Return the first register of class \p RegClass that is not in \p Regs.
unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
if (!RegClassInfoValid) {
RegClassInfo.runOnMachineFunction(*MF);
RegClassInfoValid = true;
}
for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
if (!LiveRegs.contains(Reg))
return Reg;
return 0;
}
/// Compute live registers just before instruction \p Before (in normal schedule
/// direction). Computes backwards so multiple queries in the same block must
/// come in reverse order.
void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
MachineBasicBlock::const_iterator Before) {
// Initialize if we never queried in this block.
if (!LiveRegsValid) {
LiveRegs.init(TRI);
LiveRegs.addLiveOuts(&MBB, true);
LiveRegPos = MBB.end();
LiveRegsValid = true;
}
// Move backward just before the "Before" position.
while (LiveRegPos != Before) {
--LiveRegPos;
LiveRegs.stepBackward(*LiveRegPos);
}
}
static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
unsigned Reg) {
for (const std::pair<unsigned, bool> &R : Regs)
if (R.first == Reg)
return true;
return false;
}
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
assert(NumRegs > 1);
// For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
// Compute liveness information for that register to make the decision.
bool SafeToClobberCPSR = !isThumb1 ||
(MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
MachineBasicBlock::LQR_Dead);
bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
// Exception: If the base register is in the input reglist, Thumb1 LDM is
// non-writeback.
// It's also not possible to merge an STR of the base register in Thumb1.
if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
if (Opcode == ARM::tLDRi) {
Writeback = false;
} else if (Opcode == ARM::tSTRi) {
return nullptr;
}
}
ARM_AM::AMSubMode Mode = ARM_AM::ia;
// VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
if (Offset == 4 && haveIBAndDA) {
Mode = ARM_AM::ib;
} else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
Mode = ARM_AM::da;
} else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
} else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
// Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
if (NumRegs <= 2)
return nullptr;
// On Thumb1, it's not worth materializing a new base register without
// clobbering the CPSR (i.e. not using ADDS/SUBS).
if (!SafeToClobberCPSR)
return nullptr;
unsigned NewBase;
if (isi32Load(Opcode)) {
// If it is a load, then just use one of the destination registers
// as the new base. Will no longer be writeback in Thumb1.
NewBase = Regs[NumRegs-1].first;
Writeback = false;
} else {
// Find a free register that we can use as scratch register.
moveLiveRegsBefore(MBB, InsertBefore);
// The merged instruction does not exist yet but will use several Regs if
// it is a Store.
if (!isLoadSingle(Opcode))
for (const std::pair<unsigned, bool> &R : Regs)
LiveRegs.addReg(R.first);
NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
if (NewBase == 0)
return nullptr;
}
int BaseOpc =
isThumb2 ? ARM::t2ADDri :
(isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
(isThumb1 && Offset < 8) ? ARM::tADDi3 :
isThumb1 ? ARM::tADDi8 : ARM::ADDri;
if (Offset < 0) {
Offset = - Offset;
BaseOpc =
isThumb2 ? ARM::t2SUBri :
(isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
}
if (!TL->isLegalAddImmediate(Offset))
// FIXME: Try add with register operand?
return nullptr; // Probably not worth it then.
// We can only append a kill flag to the add/sub input if the value is not
// used in the register list of the stm as well.
bool KillOldBase = BaseKill &&
(!isi32Store(Opcode) || !ContainsReg(Regs, Base));
if (isThumb1) {
// Thumb1: depending on immediate size, use either
// ADDS NewBase, Base, #imm3
// or
// MOV NewBase, Base
// ADDS NewBase, #imm8.
if (Base != NewBase &&
(BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
// Need to insert a MOV to the new base first.
if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
!STI->hasV6Ops()) {
// thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
if (Pred != ARMCC::AL)
return nullptr;
BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
.addReg(Base, getKillRegState(KillOldBase));
} else
BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
.addReg(Base, getKillRegState(KillOldBase))
.addImm(Pred).addReg(PredReg);
// The following ADDS/SUBS becomes an update.
Base = NewBase;
KillOldBase = true;
}
if (BaseOpc == ARM::tADDrSPi) {
assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
.addImm(Pred).addReg(PredReg);
} else
AddDefaultT1CC(
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
.addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg);
} else {
BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
.addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
.addImm(Pred).addReg(PredReg).addReg(0);
}
Base = NewBase;
BaseKill = true; // New base is always killed straight away.
}
bool isDef = isLoadSingle(Opcode);
// Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
// base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode)
return nullptr;
// Check if a Thumb1 LDM/STM merge is safe. This is the case if:
// - There is no writeback (LDM of base register),
// - the base register is killed by the merged instruction,
// - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
// to reset the base register.
// Otherwise, don't merge.
// It's safe to return here since the code to materialize a new base register
// above is also conditional on SafeToClobberCPSR.
if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
return nullptr;
MachineInstrBuilder MIB;
if (Writeback) {
assert(isThumb1 && "expected Writeback only inThumb1");
if (Opcode == ARM::tLDMIA) {
assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
// Update tLDMIA with writeback if necessary.
Opcode = ARM::tLDMIA_UPD;
}
MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
// Thumb1: we might need to set base writeback when building the MI.
MIB.addReg(Base, getDefRegState(true))
.addReg(Base, getKillRegState(BaseKill));
// The base isn't dead after a merged instruction with writeback.
// Insert a sub instruction after the newly formed instruction to reset.
if (!BaseKill)
UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
} else {
// No writeback, simply build the MachineInstr.
MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
MIB.addReg(Base, getKillRegState(BaseKill));
}
MIB.addImm(Pred).addReg(PredReg);
for (const std::pair<unsigned, bool> &R : Regs)
MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
return MIB.getInstr();
}
MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
bool IsLoad = isi32Load(Opcode);
assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
assert(Regs.size() == 2);
MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
TII->get(LoadStoreOpcode));
if (IsLoad) {
MIB.addReg(Regs[0].first, RegState::Define)
.addReg(Regs[1].first, RegState::Define);
} else {
MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
.addReg(Regs[1].first, getKillRegState(Regs[1].second));
}
MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
return MIB.getInstr();
}
/// Call MergeOps and update MemOps and merges accordingly on success.
MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
const MachineInstr *First = Cand.Instrs.front();
unsigned Opcode = First->getOpcode();
bool IsLoad = isLoadSingle(Opcode);
SmallVector<std::pair<unsigned, bool>, 8> Regs;
SmallVector<unsigned, 4> ImpDefs;
DenseSet<unsigned> KilledRegs;
DenseSet<unsigned> UsedRegs;
// Determine list of registers and list of implicit super-register defs.
for (const MachineInstr *MI : Cand.Instrs) {
const MachineOperand &MO = getLoadStoreRegOp(*MI);
unsigned Reg = MO.getReg();
bool IsKill = MO.isKill();
if (IsKill)
KilledRegs.insert(Reg);
Regs.push_back(std::make_pair(Reg, IsKill));
UsedRegs.insert(Reg);
if (IsLoad) {
// Collect any implicit defs of super-registers, after merging we can't
// be sure anymore that we properly preserved these live ranges and must
// removed these implicit operands.
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
assert(MO.isImplicit());
unsigned DefReg = MO.getReg();
if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
continue;
// We can ignore cases where the super-reg is read and written.
if (MI->readsRegister(DefReg))
continue;
ImpDefs.push_back(DefReg);
}
}
}
// Attempt the merge.
typedef MachineBasicBlock::iterator iterator;
MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
iterator InsertBefore = std::next(iterator(LatestMI));
MachineBasicBlock &MBB = *LatestMI->getParent();
unsigned Offset = getMemoryOpOffset(First);
unsigned Base = getLoadStoreBaseOp(*First).getReg();
bool BaseKill = LatestMI->killsRegister(Base);
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
DebugLoc DL = First->getDebugLoc();
MachineInstr *Merged = nullptr;
if (Cand.CanMergeToLSDouble)
Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
Opcode, Pred, PredReg, DL, Regs);
if (!Merged && Cand.CanMergeToLSMulti)
Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
Opcode, Pred, PredReg, DL, Regs);
if (!Merged)
return nullptr;
// Determine earliest instruction that will get removed. We then keep an
// iterator just above it so the following erases don't invalidated it.
iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
bool EarliestAtBegin = false;
if (EarliestI == MBB.begin()) {
EarliestAtBegin = true;
} else {
EarliestI = std::prev(EarliestI);
}
// Remove instructions which have been merged.
for (MachineInstr *MI : Cand.Instrs)
MBB.erase(MI);
// Determine range between the earliest removed instruction and the new one.
if (EarliestAtBegin)
EarliestI = MBB.begin();
else
EarliestI = std::next(EarliestI);
auto FixupRange = make_range(EarliestI, iterator(Merged));
if (isLoadSingle(Opcode)) {
// If the previous loads defined a super-reg, then we have to mark earlier
// operands undef; Replicate the super-reg def on the merged instruction.
for (MachineInstr &MI : FixupRange) {
for (unsigned &ImpDefReg : ImpDefs) {
for (MachineOperand &MO : MI.implicit_operands()) {
if (!MO.isReg() || MO.getReg() != ImpDefReg)
continue;
if (MO.readsReg())
MO.setIsUndef();
else if (MO.isDef())
ImpDefReg = 0;
}
}
}
MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
for (unsigned ImpDef : ImpDefs)
MIB.addReg(ImpDef, RegState::ImplicitDefine);
} else {
// Remove kill flags: We are possibly storing the values later now.
assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
for (MachineInstr &MI : FixupRange) {
for (MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.isKill())
continue;
if (UsedRegs.count(MO.getReg()))
MO.setIsKill(false);
}
}
assert(ImpDefs.empty());
}
return Merged;
}
static bool isValidLSDoubleOffset(int Offset) {
unsigned Value = abs(Offset);
// t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
// multiplied by 4.
return (Value % 4) == 0 && Value < 1024;
}
/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *FirstMI = MemOps[0].MI;
unsigned Opcode = FirstMI->getOpcode();
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
unsigned Size = getLSMultipleTransferSize(FirstMI);
unsigned SIndex = 0;
unsigned EIndex = MemOps.size();
do {
// Look at the first instruction.
const MachineInstr *MI = MemOps[SIndex].MI;
int Offset = MemOps[SIndex].Offset;
const MachineOperand &PMO = getLoadStoreRegOp(*MI);
unsigned PReg = PMO.getReg();
unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
unsigned Latest = SIndex;
unsigned Earliest = SIndex;
unsigned Count = 1;
bool CanMergeToLSDouble =
STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
// ARM errata 602117: LDRD with base in list may result in incorrect base
// register when interrupted or faulted.
if (STI->isCortexM3() && isi32Load(Opcode) &&
PReg == getLoadStoreBaseOp(*MI).getReg())
CanMergeToLSDouble = false;
bool CanMergeToLSMulti = true;
// On swift vldm/vstm starting with an odd register number as that needs
// more uops than single vldrs.
if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
CanMergeToLSMulti = false;
// LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
// deprecated; LDM to PC is fine but cannot happen here.
if (PReg == ARM::SP || PReg == ARM::PC)
CanMergeToLSMulti = CanMergeToLSDouble = false;
// Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
if (NewOffset != Offset + (int)Size)
break;
const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
unsigned Reg = MO.getReg();
if (Reg == ARM::SP || Reg == ARM::PC)
break;
// See if the current load/store may be part of a multi load/store.
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
bool PartOfLSMulti = CanMergeToLSMulti;
if (PartOfLSMulti) {
// Register numbers must be in ascending order.
if (RegNum <= PRegNum)
PartOfLSMulti = false;
// For VFP / NEON load/store multiples, the registers must be
// consecutive and within the limit on the number of registers per
// instruction.
else if (!isNotVFP && RegNum != PRegNum+1)
PartOfLSMulti = false;
}
// See if the current load/store may be part of a double load/store.
bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
if (!PartOfLSMulti && !PartOfLSDouble)
break;
CanMergeToLSMulti &= PartOfLSMulti;
CanMergeToLSDouble &= PartOfLSDouble;
// Track MemOp with latest and earliest position (Positions are
// counted in reverse).
unsigned Position = MemOps[I].Position;
if (Position < MemOps[Latest].Position)
Latest = I;
else if (Position > MemOps[Earliest].Position)
Earliest = I;
// Prepare for next MemOp.
Offset += Size;
PRegNum = RegNum;
}
// Form a candidate from the Ops collected so far.
MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)