[CodeGen] Fix incorrect usage of MCPhysReg for diff list elements

The lists contain differences between register numbers, not the register numbers themselves. Since a difference can also be negative, this also changes its type to signed. Changing the type to signed exposed a "bug". For AMDGPU, which has many registers, the first element of a sequence could be as big as ~45k. The value does not fit into int16_t, but fits into uint16_t. The bug didn't show up because of unsigned wrapping and truncation of the Val field in the advance() method. To fix the issue, I changed the way regunit difflists are encoded. The 4-bit 'scale' field of MCRegisterDesc::RegUnit was replaced by 12-bit number of the first regunit, and the first element of each of the lists was removed. The higher 20 bits of RegUnit field contain the initial offset into DiffLists array. AMDGPU has 1'409 regunits (2^12 = 4'096), and the biggest offset is 80'041 (2^20 = 1'048'576). That is, there is enough room. Changing the encoding method also resulted in a smaller array size, the numbers are below (I omitted targets with less than 100 elements). ``` AMDGPU | 80052 | 78741 | -1,6% RISCV | 6498 | 6297 | -3,1% ARM | 4181 | 3966 | -5,1% AArch64 | 2770 | 2592 | -6,4% PPC | 1578 | 1441 | -8,7% Hexagon | 994 | 740 | -25,6% R600 | 508 | 398 | -21,7% VE | 471 | 459 | -2,5% Sparc | 381 | 363 | -4,7% X86 | 326 | 208 | -36,2% Mips | 253 | 200 | -20,9% SystemZ | 186 | 162 | -12,9% ``` Reviewed By: foad, arsenm Differential Revision: https://reviews.llvm.org/D151036
python3kgae · Jun 4, 2023 · 7a25870 · 7a25870
1 parent 9424a54
commit 7a25870
Show file tree

Hide file tree

Showing 3 changed files with 61 additions and 95 deletions.
diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -111,8 +111,8 @@ struct MCRegisterDesc {
   // sub-register in SubRegs.
   uint32_t SubRegIndices;
 
-  // RegUnits - Points to the list of register units. The low 4 bits holds the
-  // Scale, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
+  // Points to the list of register units. The low bits hold the first regunit
+  // number, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
   uint32_t RegUnits;
 
   /// Index into list with lane mask sequences. The sequence contains a lanemask
@@ -161,7 +161,7 @@ class MCRegisterInfo {
   unsigned NumClasses;                        // Number of entries in the array
   unsigned NumRegUnits;                       // Number of regunits.
   const MCPhysReg (*RegUnitRoots)[2];         // Pointer to regunit root table.
-  const MCPhysReg *DiffLists;                 // Pointer to the difflists array
+  const int16_t *DiffLists;                   // Pointer to the difflists array
   const LaneBitmask *RegUnitMaskSequences;    // Pointer to lane mask sequences
                                               // for register units.
   const char *RegStrings;                     // Pointer to the string table.
@@ -194,31 +194,19 @@ class MCRegisterInfo {
   /// Don't use this class directly, use one of the specialized sub-classes
   /// defined below.
   class DiffListIterator {
-    uint16_t Val = 0;
-    const MCPhysReg *List = nullptr;
+    unsigned Val = 0;
+    const int16_t *List = nullptr;
 
   protected:
     /// Create an invalid iterator. Call init() to point to something useful.
     DiffListIterator() = default;
 
-    /// init - Point the iterator to InitVal, decoding subsequent values from
-    /// DiffList. The iterator will initially point to InitVal, sub-classes are
-    /// responsible for skipping the seed value if it is not part of the list.
-    void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {
+    /// Point the iterator to InitVal, decoding subsequent values from DiffList.
+    void init(unsigned InitVal, const int16_t *DiffList) {
       Val = InitVal;
       List = DiffList;
     }
 
-    /// advance - Move to the next list position, return the applied
-    /// differential. This function does not detect the end of the list, that
-    /// is the caller's responsibility (by checking for a 0 return value).
-    MCRegister advance() {
-      assert(isValid() && "Cannot move off the end of the list.");
-      MCPhysReg D = *List++;
-      Val += D;
-      return D;
-    }
-
   public:
     /// isValid - returns true if this iterator is not yet at the end.
     bool isValid() const { return List; }
@@ -228,8 +216,11 @@ class MCRegisterInfo {
 
     /// Pre-increment to move to the next position.
     void operator++() {
+      assert(isValid() && "Cannot move off the end of the list.");
+      int16_t D = *List++;
+      Val += D;
       // The end of the list is encoded as a 0 differential.
-      if (!advance())
+      if (!D)
         List = nullptr;
     }
 
@@ -248,9 +239,9 @@ class MCRegisterInfo {
   protected:
     mc_difflist_iterator(MCRegisterInfo::DiffListIterator Iter) : Iter(Iter) {}
 
-    // Allow conversion between instantiations where valid.
-    mc_difflist_iterator(MCRegister Reg, const MCPhysReg *DiffList) {
-      Iter.init(Reg, DiffList);
+    /// Point the iterator to InitVal, decoding subsequent values from DiffList.
+    void init(unsigned InitVal, const int16_t *DiffList) {
+      Iter.init(InitVal, DiffList);
       Val = *Iter;
     }
 
@@ -287,8 +278,11 @@ class MCRegisterInfo {
     mc_subreg_iterator(MCRegisterInfo::DiffListIterator Iter)
         : mc_difflist_iterator(Iter) {}
     mc_subreg_iterator() = default;
-    mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
-        : mc_difflist_iterator(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs) {}
+
+    mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
+      assert(MCRegister::isPhysicalRegister(Reg.id()));
+      init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+    }
   };
 
   /// Forward iterator over all super-registers.
@@ -299,9 +293,11 @@ class MCRegisterInfo {
     mc_superreg_iterator(MCRegisterInfo::DiffListIterator Iter)
         : mc_difflist_iterator(Iter) {}
     mc_superreg_iterator() = default;
-    mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
-        : mc_difflist_iterator(Reg,
-                               MCRI->DiffLists + MCRI->get(Reg).SuperRegs) {}
+
+    mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
+      assert(MCRegister::isPhysicalRegister(Reg.id()));
+      init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+    }
   };
 
   /// Return an iterator range over all sub-registers of \p Reg, excluding \p
@@ -351,16 +347,11 @@ class MCRegisterInfo {
   /// Initialize MCRegisterInfo, called by TableGen
   /// auto-generated routines. *DO NOT USE*.
   void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA,
-                          unsigned PC,
-                          const MCRegisterClass *C, unsigned NC,
-                          const MCPhysReg (*RURoots)[2],
-                          unsigned NRU,
-                          const MCPhysReg *DL,
-                          const LaneBitmask *RUMS,
-                          const char *Strings,
-                          const char *ClassStrings,
-                          const uint16_t *SubIndices,
-                          unsigned NumIndices,
+                          unsigned PC, const MCRegisterClass *C, unsigned NC,
+                          const MCPhysReg (*RURoots)[2], unsigned NRU,
+                          const int16_t *DL, const LaneBitmask *RUMS,
+                          const char *Strings, const char *ClassStrings,
+                          const uint16_t *SubIndices, unsigned NumIndices,
                           const SubRegCoveredBits *SubIdxRanges,
                           const uint16_t *RET) {
     Desc = D;
@@ -598,7 +589,8 @@ class MCSubRegIterator : public MCRegisterInfo::DiffListIterator {
 public:
   MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
                    bool IncludeSelf = false) {
-    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+    assert(MCRegister::isPhysicalRegister(Reg.id()));
+    init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
     // Initially, the iterator points to Reg itself.
     if (!IncludeSelf)
       ++*this;
@@ -647,7 +639,8 @@ class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator {
 
   MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
                      bool IncludeSelf = false) {
-    init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+    assert(MCRegister::isPhysicalRegister(Reg.id()));
+    init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
     // Initially, the iterator points to Reg itself.
     if (!IncludeSelf)
       ++*this;
@@ -675,6 +668,9 @@ inline bool MCRegisterInfo::isSuperRegister(MCRegister RegA, MCRegister RegB) co
 // MCRegUnitIterator enumerates a list of register units for Reg. The list is
 // in ascending numerical order.
 class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
+  // The value must be kept in sync with RegisterInfoEmitter.cpp.
+  static constexpr unsigned RegUnitBits = 12;
+
 public:
   /// MCRegUnitIterator - Create an iterator that traverses the register units
   /// in Reg.
@@ -685,18 +681,9 @@ class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
     assert(MCRegister::isPhysicalRegister(Reg.id()));
     // Decode the RegUnits MCRegisterDesc field.
     unsigned RU = MCRI->get(Reg).RegUnits;
-    unsigned Scale = RU & 15;
-    unsigned Offset = RU >> 4;
-
-    // Initialize the iterator to Reg * Scale, and the List pointer to
-    // DiffLists + Offset.
-    init(Reg * Scale, MCRI->DiffLists + Offset);
-
-    // That may not be a valid unit, we need to advance by one to get the real
-    // unit number. The first differential can be 0 which would normally
-    // terminate the list, but since we know every register has at least one
-    // unit, we can allow a 0 differential here.
-    advance();
+    unsigned FirstRU = RU & ((1u << RegUnitBits) - 1);
+    unsigned Offset = RU >> RegUnitBits;
+    init(FirstRU, MCRI->DiffLists + Offset);
   }
 
   MCRegUnitIterator &operator++() {

diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
@@ -392,7 +392,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
         break;
       }
       I->Size = std::max(I->Size, II->Size);
-      if (TRI->isSuperRegister(I->Reg, II->Reg))
+      if (I->Reg && TRI->isSuperRegister(I->Reg, II->Reg))
         I->Reg = II->Reg;
       II->Reg = 0; // mark for deletion.
     }

diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -635,17 +635,16 @@ static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
 // The initial value depends on the specific list. The list is terminated by a
 // 0 differential which means we can't encode repeated elements.
 
-typedef SmallVector<uint16_t, 4> DiffVec;
+typedef SmallVector<int16_t, 4> DiffVec;
 typedef SmallVector<LaneBitmask, 4> MaskVec;
 
-// Differentially encode a sequence of numbers into V. The starting value and
-// terminating 0 are not added to V, so it will have the same size as List.
-static
-DiffVec &diffEncode(DiffVec &V, unsigned InitVal, SparseBitVector<> List) {
+// Fills V with differentials between every two consecutive elements of List.
+static DiffVec &diffEncode(DiffVec &V, SparseBitVector<> List) {
   assert(V.empty() && "Clear DiffVec before diffEncode.");
-  uint16_t Val = uint16_t(InitVal);
-
-  for (uint16_t Cur : List) {
+  SparseBitVector<>::iterator I = List.begin(), E = List.end();
+  unsigned Val = *I;
+  while (++I != E) {
+    unsigned Cur = *I;
     V.push_back(Cur - Val);
     Val = Cur;
   }
@@ -656,18 +655,16 @@ template<typename Iter>
 static
 DiffVec &diffEncode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) {
   assert(V.empty() && "Clear DiffVec before diffEncode.");
-  uint16_t Val = uint16_t(InitVal);
+  unsigned Val = InitVal;
   for (Iter I = Begin; I != End; ++I) {
-    uint16_t Cur = (*I)->EnumValue;
+    unsigned Cur = (*I)->EnumValue;
     V.push_back(Cur - Val);
     Val = Cur;
   }
   return V;
 }
 
-static void printDiff16(raw_ostream &OS, uint16_t Val) {
-  OS << Val;
-}
+static void printDiff16(raw_ostream &OS, int16_t Val) { OS << Val; }
 
 static void printMask(raw_ostream &OS, LaneBitmask Val) {
   OS << "LaneBitmask(0x" << PrintLaneMask(Val) << ')';
@@ -891,7 +888,6 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   SmallVector<DiffVec, 4> SubRegLists(Regs.size());
   SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
   SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
-  SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());
 
   // List of lane masks accompanying register unit sequences.
   SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
@@ -929,31 +925,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
                SuperRegList.end());
     DiffSeqs.add(SuperRegLists[i]);
 
-    // Differentially encode the register unit list, seeded by register number.
-    // First compute a scale factor that allows more diff-lists to be reused:
-    //
-    //   D0 -> (S0, S1)
-    //   D1 -> (S2, S3)
-    //
-    // A scale factor of 2 allows D0 and D1 to share a diff-list. The initial
-    // value for the differential decoder is the register number multiplied by
-    // the scale.
-    //
-    // Check the neighboring registers for arithmetic progressions.
-    unsigned ScaleA = ~0u, ScaleB = ~0u;
-    SparseBitVector<> RUs = Reg.getNativeRegUnits();
-    if (I != Regs.begin() &&
-        std::prev(I)->getNativeRegUnits().count() == RUs.count())
-      ScaleB = *RUs.begin() - *std::prev(I)->getNativeRegUnits().begin();
-    if (std::next(I) != Regs.end() &&
-        std::next(I)->getNativeRegUnits().count() == RUs.count())
-      ScaleA = *std::next(I)->getNativeRegUnits().begin() - *RUs.begin();
-    unsigned Scale = std::min(ScaleB, ScaleA);
-    // Default the scale to 0 if it can't be encoded in 4 bits.
-    if (Scale >= 16)
-      Scale = 0;
-    RegUnitInitScale[i] = Scale;
-    DiffSeqs.add(diffEncode(RegUnitLists[i], Scale * Reg.EnumValue, RUs));
+    const SparseBitVector<> &RUs = Reg.getNativeRegUnits();
+    DiffSeqs.add(diffEncode(RegUnitLists[i], RUs));
 
     const auto &RUMasks = Reg.getRegUnitLaneMasks();
     MaskVec &LaneMaskVec = RegUnitLaneMasks[i];
@@ -978,7 +951,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   const std::string &TargetName = std::string(Target.getName());
 
   // Emit the shared table of differential lists.
-  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";
+  OS << "extern const int16_t " << TargetName << "RegDiffLists[] = {\n";
   DiffSeqs.emit(OS, printDiff16);
   OS << "};\n\n";
 
@@ -1014,10 +987,16 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   // Emit the register descriptors now.
   i = 0;
   for (const auto &Reg : Regs) {
+    unsigned FirstRU = Reg.getNativeRegUnits().find_first();
+    unsigned Offset = DiffSeqs.get(RegUnitLists[i]);
+    // The value must be kept in sync with MCRegisterInfo.h.
+    constexpr unsigned RegUnitBits = 12;
+    assert(isUInt<RegUnitBits>(FirstRU) && "Too many regunits");
+    assert(isUInt<32 - RegUnitBits>(Offset) && "Offset is too big");
     OS << "  { " << RegStrings.get(std::string(Reg.getName())) << ", "
        << DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
        << ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
-       << (DiffSeqs.get(RegUnitLists[i]) * 16 + RegUnitInitScale[i]) << ", "
+       << (Offset << RegUnitBits | FirstRU) << ", "
        << LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
     ++i;
   }
@@ -1651,7 +1630,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
-  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
+  OS << "extern const int16_t " << TargetName << "RegDiffLists[];\n";
   OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const char " << TargetName << "RegClassStrings[];\n";