Skip to content

Commit 0b89712

Browse files
committed
Assume lane masks are always precise
Allowing imprecise lane masks in case of more than 32 sub register lanes lead to some tricky corner cases, and I need another bugfix for another one. Instead I rather declare lane masks as precise and let tablegen abort if we do not have enough bits. This does not affect any in-tree target, even AMDGPU only needs 16 lanes at the moment. If the 32 lanes turn out to be a problem in the future, then we can easily change the LaneBitmask typedef to uint64_t. Differential Revision: http://reviews.llvm.org/D14557 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253279 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0f6bba9 commit 0b89712

File tree

4 files changed

+26
-80
lines changed

4 files changed

+26
-80
lines changed

include/llvm/Target/TargetRegisterInfo.h

+3-19
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,21 @@ class VirtRegMap;
3535
class raw_ostream;
3636
class LiveRegMatrix;
3737

38-
/// A bitmask representing the parts of a register are alive.
38+
/// A bitmask representing the covering of a register with sub-registers.
3939
///
40+
/// This is typically used to track liveness at sub-register granularity.
4041
/// Lane masks for sub-register indices are similar to register units for
4142
/// physical registers. The individual bits in a lane mask can't be assigned
4243
/// any specific meaning. They can be used to check if two sub-register
4344
/// indices overlap.
4445
///
45-
/// If the target has a register such that:
46+
/// Iff the target has a register such that:
4647
///
4748
/// getSubReg(Reg, A) overlaps getSubReg(Reg, B)
4849
///
4950
/// then:
5051
///
5152
/// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
52-
///
53-
/// The converse is not necessarily true. If two lane masks have a common
54-
/// bit, the corresponding sub-registers may not overlap, but it can be
55-
/// assumed that they usually will.
5653
typedef unsigned LaneBitmask;
5754

5855
class TargetRegisterClass {
@@ -369,19 +366,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
369366
return SubRegIndexLaneMasks[SubIdx];
370367
}
371368

372-
/// Returns true if the given lane mask is imprecise.
373-
///
374-
/// LaneMasks as given by getSubRegIndexLaneMask() have a limited number of
375-
/// bits, so for targets with more than 31 disjunct subregister indices there
376-
/// may be cases where:
377-
/// getSubReg(Reg,A) does not overlap getSubReg(Reg,B)
378-
/// but we still have
379-
/// (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0.
380-
/// This function returns true in those cases.
381-
static bool isImpreciseLaneMask(LaneBitmask LaneMask) {
382-
return LaneMask & 0x80000000u;
383-
}
384-
385369
/// The lane masks returned by getSubRegIndexLaneMask() above can only be
386370
/// used to determine if sub-registers overlap - they can't be used to
387371
/// determine if a set of sub-registers completely cover another

lib/CodeGen/RegisterCoalescer.cpp

+17-40
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,12 @@ namespace {
163163
/// LaneMask are split as necessary. @p LaneMask are the lanes that
164164
/// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
165165
/// lanemasks already adjusted to the coalesced register.
166-
/// @returns false if live range conflicts couldn't get resolved.
167-
bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
166+
void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
168167
LaneBitmask LaneMask, CoalescerPair &CP);
169168

170169
/// Join the liveranges of two subregisters. Joins @p RRange into
171170
/// @p LRange, @p RRange may be invalid afterwards.
172-
/// @returns false if live range conflicts couldn't get resolved.
173-
bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
171+
void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
174172
LaneBitmask LaneMask, const CoalescerPair &CP);
175173

176174
/// We found a non-trivially-coalescable copy. If the source value number is
@@ -2469,7 +2467,7 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
24692467
}
24702468
}
24712469

2472-
bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
2470+
void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
24732471
LaneBitmask LaneMask,
24742472
const CoalescerPair &CP) {
24752473
SmallVector<VNInfo*, 16> NewVNInfo;
@@ -2484,13 +2482,15 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
24842482
// ranges get mapped to the "overflow" lane mask bit which creates unexpected
24852483
// interferences.
24862484
if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
2487-
DEBUG(dbgs() << "*** Couldn't join subrange!\n");
2488-
return false;
2485+
// We already determined that it is legal to merge the intervals, so this
2486+
// should never fail.
2487+
llvm_unreachable("*** Couldn't join subrange!\n");
24892488
}
24902489
if (!LHSVals.resolveConflicts(RHSVals) ||
24912490
!RHSVals.resolveConflicts(LHSVals)) {
2492-
DEBUG(dbgs() << "*** Couldn't join subrange!\n");
2493-
return false;
2491+
// We already determined that it is legal to merge the intervals, so this
2492+
// should never fail.
2493+
llvm_unreachable("*** Couldn't join subrange!\n");
24942494
}
24952495

24962496
// The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2513,17 +2513,16 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
25132513

25142514
DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
25152515
if (EndPoints.empty())
2516-
return true;
2516+
return;
25172517

25182518
// Recompute the parts of the live range we had to remove because of
25192519
// CR_Replace conflicts.
25202520
DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
25212521
<< " points: " << LRange << '\n');
25222522
LIS->extendToIndices(LRange, EndPoints);
2523-
return true;
25242523
}
25252524

2526-
bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
2525+
void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
25272526
const LiveRange &ToMerge,
25282527
LaneBitmask LaneMask,
25292528
CoalescerPair &CP) {
@@ -2553,16 +2552,14 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
25532552
CommonRange = &R;
25542553
}
25552554
LiveRange RangeCopy(ToMerge, Allocator);
2556-
if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
2557-
return false;
2555+
joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
25582556
LaneMask &= ~RMask;
25592557
}
25602558

25612559
if (LaneMask != 0) {
25622560
DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
25632561
LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
25642562
}
2565-
return true;
25662563
}
25672564

25682565
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -2613,41 +2610,21 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
26132610

26142611
// Determine lanemasks of RHS in the coalesced register and merge subranges.
26152612
unsigned SrcIdx = CP.getSrcIdx();
2616-
bool Abort = false;
26172613
if (!RHS.hasSubRanges()) {
26182614
LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
26192615
: TRI->getSubRegIndexLaneMask(SrcIdx);
2620-
if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
2621-
Abort = true;
2616+
mergeSubRangeInto(LHS, RHS, Mask, CP);
26222617
} else {
26232618
// Pair up subranges and merge.
26242619
for (LiveInterval::SubRange &R : RHS.subranges()) {
26252620
LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
2626-
if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
2627-
Abort = true;
2628-
break;
2629-
}
2621+
mergeSubRangeInto(LHS, R, Mask, CP);
26302622
}
26312623
}
2632-
if (Abort) {
2633-
// This shouldn't have happened :-(
2634-
// However we are aware of at least one existing problem where we
2635-
// can't merge subranges when multiple ranges end up in the
2636-
// "overflow bit" 32. As a workaround we drop all subregister ranges
2637-
// which means we loose some precision but are back to a well defined
2638-
// state.
2639-
assert(TargetRegisterInfo::isImpreciseLaneMask(
2640-
CP.getNewRC()->getLaneMask())
2641-
&& "SubRange merge should only fail when merging into bit 32.");
2642-
DEBUG(dbgs() << "\tSubrange join aborted!\n");
2643-
LHS.clearSubRanges();
2644-
RHS.clearSubRanges();
2645-
} else {
2646-
DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
2624+
DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
26472625

2648-
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
2649-
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
2650-
}
2626+
LHSVals.pruneSubRegValues(LHS, ShrinkMask);
2627+
RHSVals.pruneSubRegValues(LHS, ShrinkMask);
26512628
}
26522629

26532630
// The merging algorithm in LiveInterval::join() can't handle conflicting

lib/CodeGen/VirtRegMap.cpp

-8
Original file line numberDiff line numberDiff line change
@@ -400,14 +400,6 @@ void VirtRegRewriter::rewrite() {
400400
MO.setIsUndef(true);
401401
} else if (!MO.isDead()) {
402402
assert(MO.isDef());
403-
// Things get tricky when we ran out of lane mask bits and
404-
// merged multiple lanes into the overflow bit: In this case
405-
// our subregister liveness tracking isn't precise and we can't
406-
// know what subregister parts are undefined, fall back to the
407-
// implicit super-register def then.
408-
LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
409-
if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask))
410-
SuperDefs.push_back(PhysReg);
411403
}
412404
}
413405

utils/TableGen/CodeGenRegisters.cpp

+6-13
Original file line numberDiff line numberDiff line change
@@ -1171,20 +1171,13 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
11711171
CoveringLanes = ~0u;
11721172
for (auto &Idx : SubRegIndices) {
11731173
if (Idx.getComposites().empty()) {
1174+
if (Bit > 32) {
1175+
PrintFatalError(
1176+
Twine("Ran out of lanemask bits to represent subregister ")
1177+
+ Idx.getName());
1178+
}
11741179
Idx.LaneMask = 1u << Bit;
1175-
// Share bit 31 in the unlikely case there are more than 32 leafs.
1176-
//
1177-
// Sharing bits is harmless; it allows graceful degradation in targets
1178-
// with more than 32 vector lanes. They simply get a limited resolution
1179-
// view of lanes beyond the 32nd.
1180-
//
1181-
// See also the comment for getSubRegIndexLaneMask().
1182-
if (Bit < 31)
1183-
++Bit;
1184-
else
1185-
// Once bit 31 is shared among multiple leafs, the 'lane' it represents
1186-
// is no longer covering its registers.
1187-
CoveringLanes &= ~(1u << Bit);
1180+
++Bit;
11881181
} else {
11891182
Idx.LaneMask = 0;
11901183
}

0 commit comments

Comments
 (0)