Skip to content

Commit

Permalink
[CodeGen] Fix incorrect usage of MCPhysReg for diff list elements
Browse files Browse the repository at this point in the history
The lists contain differences between register numbers, not the register
numbers themselves. Since a difference can also be negative, this also
changes its type to signed.

Changing the type to signed exposed a "bug". For AMDGPU, which has many
registers, the first element of a sequence could be as big as ~45k.
The value does not fit into int16_t, but fits into uint16_t. The bug
didn't show up because of unsigned wrapping and truncation of the Val
field in the advance() method.

To fix the issue, I changed the way regunit difflists are encoded. The
4-bit 'scale' field of MCRegisterDesc::RegUnit was replaced by 12-bit
number of the first regunit, and the first element of each of the lists
was removed. The higher 20 bits of RegUnit field contain the initial
offset into DiffLists array.
AMDGPU has 1'409 regunits (2^12 = 4'096), and the biggest offset is
80'041 (2^20 = 1'048'576). That is, there is enough room.

Changing the encoding method also resulted in a smaller array size, the
numbers are below (I omitted targets with less than 100 elements).

```
AMDGPU   | 80052 | 78741 |  -1,6%
RISCV    |  6498 |  6297 |  -3,1%
ARM      |  4181 |  3966 |  -5,1%
AArch64  |  2770 |  2592 |  -6,4%
PPC      |  1578 |  1441 |  -8,7%
Hexagon  |   994 |   740 | -25,6%
R600     |   508 |   398 | -21,7%
VE       |   471 |   459 |  -2,5%
Sparc    |   381 |   363 |  -4,7%
X86      |   326 |   208 | -36,2%
Mips     |   253 |   200 | -20,9%
SystemZ  |   186 |   162 | -12,9%
```

Reviewed By: foad, arsenm

Differential Revision: https://reviews.llvm.org/D151036
  • Loading branch information
s-barannikov committed Jun 4, 2023
1 parent 9424a54 commit 7a25870
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 95 deletions.
91 changes: 39 additions & 52 deletions llvm/include/llvm/MC/MCRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,8 @@ struct MCRegisterDesc {
// sub-register in SubRegs.
uint32_t SubRegIndices;

// RegUnits - Points to the list of register units. The low 4 bits holds the
// Scale, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
// Points to the list of register units. The low bits hold the first regunit
// number, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
uint32_t RegUnits;

/// Index into list with lane mask sequences. The sequence contains a lanemask
Expand Down Expand Up @@ -161,7 +161,7 @@ class MCRegisterInfo {
unsigned NumClasses; // Number of entries in the array
unsigned NumRegUnits; // Number of regunits.
const MCPhysReg (*RegUnitRoots)[2]; // Pointer to regunit root table.
const MCPhysReg *DiffLists; // Pointer to the difflists array
const int16_t *DiffLists; // Pointer to the difflists array
const LaneBitmask *RegUnitMaskSequences; // Pointer to lane mask sequences
// for register units.
const char *RegStrings; // Pointer to the string table.
Expand Down Expand Up @@ -194,31 +194,19 @@ class MCRegisterInfo {
/// Don't use this class directly, use one of the specialized sub-classes
/// defined below.
class DiffListIterator {
uint16_t Val = 0;
const MCPhysReg *List = nullptr;
unsigned Val = 0;
const int16_t *List = nullptr;

protected:
/// Create an invalid iterator. Call init() to point to something useful.
DiffListIterator() = default;

/// init - Point the iterator to InitVal, decoding subsequent values from
/// DiffList. The iterator will initially point to InitVal, sub-classes are
/// responsible for skipping the seed value if it is not part of the list.
void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {
/// Point the iterator to InitVal, decoding subsequent values from DiffList.
void init(unsigned InitVal, const int16_t *DiffList) {
Val = InitVal;
List = DiffList;
}

/// advance - Move to the next list position, return the applied
/// differential. This function does not detect the end of the list, that
/// is the caller's responsibility (by checking for a 0 return value).
MCRegister advance() {
assert(isValid() && "Cannot move off the end of the list.");
MCPhysReg D = *List++;
Val += D;
return D;
}

public:
/// isValid - returns true if this iterator is not yet at the end.
bool isValid() const { return List; }
Expand All @@ -228,8 +216,11 @@ class MCRegisterInfo {

/// Pre-increment to move to the next position.
void operator++() {
assert(isValid() && "Cannot move off the end of the list.");
int16_t D = *List++;
Val += D;
// The end of the list is encoded as a 0 differential.
if (!advance())
if (!D)
List = nullptr;
}

Expand All @@ -248,9 +239,9 @@ class MCRegisterInfo {
protected:
mc_difflist_iterator(MCRegisterInfo::DiffListIterator Iter) : Iter(Iter) {}

// Allow conversion between instantiations where valid.
mc_difflist_iterator(MCRegister Reg, const MCPhysReg *DiffList) {
Iter.init(Reg, DiffList);
/// Point the iterator to InitVal, decoding subsequent values from DiffList.
void init(unsigned InitVal, const int16_t *DiffList) {
Iter.init(InitVal, DiffList);
Val = *Iter;
}

Expand Down Expand Up @@ -287,8 +278,11 @@ class MCRegisterInfo {
mc_subreg_iterator(MCRegisterInfo::DiffListIterator Iter)
: mc_difflist_iterator(Iter) {}
mc_subreg_iterator() = default;
mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
: mc_difflist_iterator(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs) {}

mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
assert(MCRegister::isPhysicalRegister(Reg.id()));
init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
}
};

/// Forward iterator over all super-registers.
Expand All @@ -299,9 +293,11 @@ class MCRegisterInfo {
mc_superreg_iterator(MCRegisterInfo::DiffListIterator Iter)
: mc_difflist_iterator(Iter) {}
mc_superreg_iterator() = default;
mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
: mc_difflist_iterator(Reg,
MCRI->DiffLists + MCRI->get(Reg).SuperRegs) {}

mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
assert(MCRegister::isPhysicalRegister(Reg.id()));
init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
}
};

/// Return an iterator range over all sub-registers of \p Reg, excluding \p
Expand Down Expand Up @@ -351,16 +347,11 @@ class MCRegisterInfo {
/// Initialize MCRegisterInfo, called by TableGen
/// auto-generated routines. *DO NOT USE*.
void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA,
unsigned PC,
const MCRegisterClass *C, unsigned NC,
const MCPhysReg (*RURoots)[2],
unsigned NRU,
const MCPhysReg *DL,
const LaneBitmask *RUMS,
const char *Strings,
const char *ClassStrings,
const uint16_t *SubIndices,
unsigned NumIndices,
unsigned PC, const MCRegisterClass *C, unsigned NC,
const MCPhysReg (*RURoots)[2], unsigned NRU,
const int16_t *DL, const LaneBitmask *RUMS,
const char *Strings, const char *ClassStrings,
const uint16_t *SubIndices, unsigned NumIndices,
const SubRegCoveredBits *SubIdxRanges,
const uint16_t *RET) {
Desc = D;
Expand Down Expand Up @@ -598,7 +589,8 @@ class MCSubRegIterator : public MCRegisterInfo::DiffListIterator {
public:
MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
bool IncludeSelf = false) {
init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs);
assert(MCRegister::isPhysicalRegister(Reg.id()));
init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
// Initially, the iterator points to Reg itself.
if (!IncludeSelf)
++*this;
Expand Down Expand Up @@ -647,7 +639,8 @@ class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator {

MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
bool IncludeSelf = false) {
init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
assert(MCRegister::isPhysicalRegister(Reg.id()));
init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
// Initially, the iterator points to Reg itself.
if (!IncludeSelf)
++*this;
Expand Down Expand Up @@ -675,6 +668,9 @@ inline bool MCRegisterInfo::isSuperRegister(MCRegister RegA, MCRegister RegB) co
// MCRegUnitIterator enumerates a list of register units for Reg. The list is
// in ascending numerical order.
class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
// The value must be kept in sync with RegisterInfoEmitter.cpp.
static constexpr unsigned RegUnitBits = 12;

public:
/// MCRegUnitIterator - Create an iterator that traverses the register units
/// in Reg.
Expand All @@ -685,18 +681,9 @@ class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
assert(MCRegister::isPhysicalRegister(Reg.id()));
// Decode the RegUnits MCRegisterDesc field.
unsigned RU = MCRI->get(Reg).RegUnits;
unsigned Scale = RU & 15;
unsigned Offset = RU >> 4;

// Initialize the iterator to Reg * Scale, and the List pointer to
// DiffLists + Offset.
init(Reg * Scale, MCRI->DiffLists + Offset);

// That may not be a valid unit, we need to advance by one to get the real
// unit number. The first differential can be 0 which would normally
// terminate the list, but since we know every register has at least one
// unit, we can allow a 0 differential here.
advance();
unsigned FirstRU = RU & ((1u << RegUnitBits) - 1);
unsigned Offset = RU >> RegUnitBits;
init(FirstRU, MCRI->DiffLists + Offset);
}

MCRegUnitIterator &operator++() {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/StackMaps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
break;
}
I->Size = std::max(I->Size, II->Size);
if (TRI->isSuperRegister(I->Reg, II->Reg))
if (I->Reg && TRI->isSuperRegister(I->Reg, II->Reg))
I->Reg = II->Reg;
II->Reg = 0; // mark for deletion.
}
Expand Down
63 changes: 21 additions & 42 deletions llvm/utils/TableGen/RegisterInfoEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -635,17 +635,16 @@ static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
// The initial value depends on the specific list. The list is terminated by a
// 0 differential which means we can't encode repeated elements.

typedef SmallVector<uint16_t, 4> DiffVec;
typedef SmallVector<int16_t, 4> DiffVec;
typedef SmallVector<LaneBitmask, 4> MaskVec;

// Differentially encode a sequence of numbers into V. The starting value and
// terminating 0 are not added to V, so it will have the same size as List.
static
DiffVec &diffEncode(DiffVec &V, unsigned InitVal, SparseBitVector<> List) {
// Fills V with differentials between every two consecutive elements of List.
static DiffVec &diffEncode(DiffVec &V, SparseBitVector<> List) {
assert(V.empty() && "Clear DiffVec before diffEncode.");
uint16_t Val = uint16_t(InitVal);

for (uint16_t Cur : List) {
SparseBitVector<>::iterator I = List.begin(), E = List.end();
unsigned Val = *I;
while (++I != E) {
unsigned Cur = *I;
V.push_back(Cur - Val);
Val = Cur;
}
Expand All @@ -656,18 +655,16 @@ template<typename Iter>
static
DiffVec &diffEncode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) {
assert(V.empty() && "Clear DiffVec before diffEncode.");
uint16_t Val = uint16_t(InitVal);
unsigned Val = InitVal;
for (Iter I = Begin; I != End; ++I) {
uint16_t Cur = (*I)->EnumValue;
unsigned Cur = (*I)->EnumValue;
V.push_back(Cur - Val);
Val = Cur;
}
return V;
}

static void printDiff16(raw_ostream &OS, uint16_t Val) {
OS << Val;
}
static void printDiff16(raw_ostream &OS, int16_t Val) { OS << Val; }

static void printMask(raw_ostream &OS, LaneBitmask Val) {
OS << "LaneBitmask(0x" << PrintLaneMask(Val) << ')';
Expand Down Expand Up @@ -891,7 +888,6 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
SmallVector<DiffVec, 4> SubRegLists(Regs.size());
SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());

// List of lane masks accompanying register unit sequences.
SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
Expand Down Expand Up @@ -929,31 +925,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
SuperRegList.end());
DiffSeqs.add(SuperRegLists[i]);

// Differentially encode the register unit list, seeded by register number.
// First compute a scale factor that allows more diff-lists to be reused:
//
// D0 -> (S0, S1)
// D1 -> (S2, S3)
//
// A scale factor of 2 allows D0 and D1 to share a diff-list. The initial
// value for the differential decoder is the register number multiplied by
// the scale.
//
// Check the neighboring registers for arithmetic progressions.
unsigned ScaleA = ~0u, ScaleB = ~0u;
SparseBitVector<> RUs = Reg.getNativeRegUnits();
if (I != Regs.begin() &&
std::prev(I)->getNativeRegUnits().count() == RUs.count())
ScaleB = *RUs.begin() - *std::prev(I)->getNativeRegUnits().begin();
if (std::next(I) != Regs.end() &&
std::next(I)->getNativeRegUnits().count() == RUs.count())
ScaleA = *std::next(I)->getNativeRegUnits().begin() - *RUs.begin();
unsigned Scale = std::min(ScaleB, ScaleA);
// Default the scale to 0 if it can't be encoded in 4 bits.
if (Scale >= 16)
Scale = 0;
RegUnitInitScale[i] = Scale;
DiffSeqs.add(diffEncode(RegUnitLists[i], Scale * Reg.EnumValue, RUs));
const SparseBitVector<> &RUs = Reg.getNativeRegUnits();
DiffSeqs.add(diffEncode(RegUnitLists[i], RUs));

const auto &RUMasks = Reg.getRegUnitLaneMasks();
MaskVec &LaneMaskVec = RegUnitLaneMasks[i];
Expand All @@ -978,7 +951,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
const std::string &TargetName = std::string(Target.getName());

// Emit the shared table of differential lists.
OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";
OS << "extern const int16_t " << TargetName << "RegDiffLists[] = {\n";
DiffSeqs.emit(OS, printDiff16);
OS << "};\n\n";

Expand Down Expand Up @@ -1014,10 +987,16 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
// Emit the register descriptors now.
i = 0;
for (const auto &Reg : Regs) {
unsigned FirstRU = Reg.getNativeRegUnits().find_first();
unsigned Offset = DiffSeqs.get(RegUnitLists[i]);
// The value must be kept in sync with MCRegisterInfo.h.
constexpr unsigned RegUnitBits = 12;
assert(isUInt<RegUnitBits>(FirstRU) && "Too many regunits");
assert(isUInt<32 - RegUnitBits>(Offset) && "Offset is too big");
OS << " { " << RegStrings.get(std::string(Reg.getName())) << ", "
<< DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
<< ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
<< (DiffSeqs.get(RegUnitLists[i]) * 16 + RegUnitInitScale[i]) << ", "
<< (Offset << RegUnitBits | FirstRU) << ", "
<< LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
++i;
}
Expand Down Expand Up @@ -1651,7 +1630,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,

// Emit the constructor of the class...
OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
OS << "extern const int16_t " << TargetName << "RegDiffLists[];\n";
OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[];\n";
OS << "extern const char " << TargetName << "RegStrings[];\n";
OS << "extern const char " << TargetName << "RegClassStrings[];\n";
Expand Down

0 comments on commit 7a25870

Please sign in to comment.