Skip to content

Commit

Permalink
AMDGPU/NFC: Minor clean ups in PAL metadata
Browse files Browse the repository at this point in the history
  - Move PAL metadata definitions to AMDGPUMetadata
  - Make naming consistent with HSA metadata

Differential Revision: https://reviews.llvm.org/D38745


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@315523 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
kzhuravl committed Oct 11, 2017
1 parent 44bc30d commit 2578287
Show file tree
Hide file tree
Showing 16 changed files with 146 additions and 99 deletions.
43 changes: 43 additions & 0 deletions include/llvm/Support/AMDGPUMetadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,49 @@ struct Metadata final {
};

} // end namespace HSAMD

//===----------------------------------------------------------------------===//
// PAL metadata.
//===----------------------------------------------------------------------===//
namespace PALMD {

/// \brief PAL metadata assembler directive.
constexpr char AssemblerDirective[] = ".amd_amdgpu_pal_metadata";

/// \brief PAL metadata keys.
enum Key : uint32_t {
LS_NUM_USED_VGPRS = 0x10000015,
HS_NUM_USED_VGPRS = 0x10000016,
ES_NUM_USED_VGPRS = 0x10000017,
GS_NUM_USED_VGPRS = 0x10000018,
VS_NUM_USED_VGPRS = 0x10000019,
PS_NUM_USED_VGPRS = 0x1000001a,
CS_NUM_USED_VGPRS = 0x1000001b,

LS_NUM_USED_SGPRS = 0x1000001c,
HS_NUM_USED_SGPRS = 0x1000001d,
ES_NUM_USED_SGPRS = 0x1000001e,
GS_NUM_USED_SGPRS = 0x1000001f,
VS_NUM_USED_SGPRS = 0x10000020,
PS_NUM_USED_SGPRS = 0x10000021,
CS_NUM_USED_SGPRS = 0x10000022,

LS_SCRATCH_SIZE = 0x10000038,
HS_SCRATCH_SIZE = 0x10000039,
ES_SCRATCH_SIZE = 0x1000003a,
GS_SCRATCH_SIZE = 0x1000003b,
VS_SCRATCH_SIZE = 0x1000003c,
PS_SCRATCH_SIZE = 0x1000003d,
CS_SCRATCH_SIZE = 0x1000003e
};

/// \brief PAL metadata represented as a vector.
typedef std::vector<uint32_t> Metadata;

/// \brief Converts \p PALMetadata to \p String.
std::error_code toString(const Metadata &PALMetadata, std::string &String);

} // end namespace PALMD
} // end namespace AMDGPU
} // end namespace llvm

Expand Down
15 changes: 15 additions & 0 deletions lib/Support/AMDGPUMetadata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/Twine.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/YAMLTraits.h"

Expand Down Expand Up @@ -213,5 +214,19 @@ std::error_code Metadata::toYamlString(
}

} // end namespace HSAMD

namespace PALMD {

std::error_code toString(const Metadata &PALMetadata, std::string &String) {
raw_string_ostream Stream(String);
for (auto I = PALMetadata.begin(), E = PALMetadata.end(); I != E; ++I) {
Stream << Twine(I == PALMetadata.begin() ? " 0x" : ",0x");
Stream << Twine::utohexstr(*I);
}
Stream.flush();
return std::error_code();
}

} // end namespace PALMD
} // end namespace AMDGPU
} // end namespace llvm
81 changes: 42 additions & 39 deletions lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,13 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"

using namespace llvm;
using namespace llvm::AMDGPU;

// TODO: This should get the default rounding mode from the kernel. We just set
// the default here, but this could change if the OpenCL rounding mode pragmas
Expand Down Expand Up @@ -114,7 +116,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
AMDGPU::IsaInfo::getIsaVersion(getSTI()->getFeatureBits());

if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
readPalMetadata(M);
readPALMetadata(M);
// AMDPAL wants an HSA_ISA .note.
getTargetStreamer().EmitDirectiveHSACodeObjectISA(
ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
Expand All @@ -132,12 +134,12 @@ void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDPAL) {
// Copy the PAL metadata from the map where we collected it into a vector,
// then write it as a .note.
std::vector<uint32_t> Data;
for (auto i : PalMetadata) {
Data.push_back(i.first);
Data.push_back(i.second);
PALMD::Metadata PALMetadataVector;
for (auto i : PALMetadataMap) {
PALMetadataVector.push_back(i.first);
PALMetadataVector.push_back(i.second);
}
getTargetStreamer().EmitPalMetadata(Data);
getTargetStreamer().EmitPALMetadata(PALMetadataVector);
}

if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
Expand Down Expand Up @@ -207,11 +209,11 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) {
}

// For the amdpal OS type, read the amdgpu.pal.metadata supplied by the
// frontend into our PalMetadata map, ready for per-function modification. It
// frontend into our PALMetadataMap, ready for per-function modification. It
// is a NamedMD containing an MDTuple containing a number of MDNodes each of
// which is an integer value, and each two integer values forms a key=value
// pair that we store as PalMetadata[key]=value in the map.
void AMDGPUAsmPrinter::readPalMetadata(Module &M) {
// pair that we store as PALMetadataMap[key]=value in the map.
void AMDGPUAsmPrinter::readPALMetadata(Module &M) {
auto NamedMD = M.getNamedMetadata("amdgpu.pal.metadata");
if (!NamedMD || !NamedMD->getNumOperands())
return;
Expand All @@ -223,7 +225,7 @@ void AMDGPUAsmPrinter::readPalMetadata(Module &M) {
auto Val = mdconst::dyn_extract<ConstantInt>(Tuple->getOperand(I + 1));
if (!Key || !Val)
continue;
PalMetadata[Key->getZExtValue()] = Val->getZExtValue();
PALMetadataMap[Key->getZExtValue()] = Val->getZExtValue();
}
}

Expand Down Expand Up @@ -270,7 +272,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}

if (STM.isAmdPalOS())
EmitPalMetadata(MF, CurrentProgramInfo);
EmitPALMetadata(MF, CurrentProgramInfo);
if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, CurrentProgramInfo);
}
Expand Down Expand Up @@ -964,10 +966,10 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,

// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
// is AMDPAL. It stores each compute/SPI register setting and other PAL
// metadata items into the PalMetadata map, combining with any provided by the
// frontend as LLVM metadata. Once all functions are written, PalMetadata is
// metadata items into the PALMetadataMap, combining with any provided by the
// frontend as LLVM metadata. Once all functions are written, PALMetadataMap is
// then written as a single block in the .note section.
void AMDGPUAsmPrinter::EmitPalMetadata(const MachineFunction &MF,
void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &CurrentProgramInfo) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// Given the calling convention, calculate the register number for rsrc1. In
Expand All @@ -981,52 +983,53 @@ void AMDGPUAsmPrinter::EmitPalMetadata(const MachineFunction &MF,
// Also calculate the PAL metadata key for *S_SCRATCH_SIZE. It can be used
// with a constant offset to access any non-register shader-specific PAL
// metadata key.
unsigned ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE;
unsigned ScratchSizeKey = PALMD::Key::CS_SCRATCH_SIZE;
switch (MF.getFunction()->getCallingConv()) {
case CallingConv::AMDGPU_PS:
ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE;
ScratchSizeKey = PALMD::Key::PS_SCRATCH_SIZE;
break;
case CallingConv::AMDGPU_VS:
ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE;
ScratchSizeKey = PALMD::Key::VS_SCRATCH_SIZE;
break;
case CallingConv::AMDGPU_GS:
ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE;
ScratchSizeKey = PALMD::Key::GS_SCRATCH_SIZE;
break;
case CallingConv::AMDGPU_ES:
ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE;
ScratchSizeKey = PALMD::Key::ES_SCRATCH_SIZE;
break;
case CallingConv::AMDGPU_HS:
ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE;
ScratchSizeKey = PALMD::Key::HS_SCRATCH_SIZE;
break;
case CallingConv::AMDGPU_LS:
ScratchSizeKey = AMDGPU::ElfNote::AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE;
ScratchSizeKey = PALMD::Key::LS_SCRATCH_SIZE;
break;
}
unsigned NumUsedVgprsKey = ScratchSizeKey
+ AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS
- AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE;
unsigned NumUsedSgprsKey = ScratchSizeKey
+ AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS
- AMDGPU::ElfNote::AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE;
PalMetadata[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
PalMetadata[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
unsigned NumUsedVgprsKey = ScratchSizeKey +
PALMD::Key::VS_NUM_USED_VGPRS - PALMD::Key::VS_SCRATCH_SIZE;
unsigned NumUsedSgprsKey = ScratchSizeKey +
PALMD::Key::VS_NUM_USED_SGPRS - PALMD::Key::VS_SCRATCH_SIZE;
PALMetadataMap[NumUsedVgprsKey] = CurrentProgramInfo.NumVGPRsForWavesPerEU;
PALMetadataMap[NumUsedSgprsKey] = CurrentProgramInfo.NumSGPRsForWavesPerEU;
if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
PalMetadata[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
PalMetadata[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
PALMetadataMap[Rsrc1Reg] |= CurrentProgramInfo.ComputePGMRSrc1;
PALMetadataMap[Rsrc2Reg] |= CurrentProgramInfo.ComputePGMRSrc2;
// ScratchSize is in bytes, 16 aligned.
PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16);
PALMetadataMap[ScratchSizeKey] |=
alignTo(CurrentProgramInfo.ScratchSize, 16);
} else {
PalMetadata[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks)
| S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
PALMetadataMap[Rsrc1Reg] |= S_00B028_VGPRS(CurrentProgramInfo.VGPRBlocks) |
S_00B028_SGPRS(CurrentProgramInfo.SGPRBlocks);
if (CurrentProgramInfo.ScratchBlocks > 0)
PalMetadata[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
PALMetadataMap[Rsrc2Reg] |= S_00B84C_SCRATCH_EN(1);
// ScratchSize is in bytes, 16 aligned.
PalMetadata[ScratchSizeKey] |= alignTo(CurrentProgramInfo.ScratchSize, 16);
PALMetadataMap[ScratchSizeKey] |=
alignTo(CurrentProgramInfo.ScratchSize, 16);
}
if (MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_PS) {
PalMetadata[Rsrc2Reg] |= S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
PalMetadata[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
PalMetadata[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
PALMetadataMap[Rsrc2Reg] |=
S_00B02C_EXTRA_LDS_SIZE(CurrentProgramInfo.LDSBlocks);
PALMetadataMap[R_0286CC_SPI_PS_INPUT_ENA / 4] |= MFI->getPSInputEnable();
PALMetadataMap[R_0286D0_SPI_PS_INPUT_ADDR / 4] |= MFI->getPSInputAddr();
}
}

Expand Down
10 changes: 6 additions & 4 deletions lib/Target/AMDGPU/AMDGPUAsmPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ class AMDGPUAsmPrinter final : public AsmPrinter {

SIProgramInfo CurrentProgramInfo;
DenseMap<const Function *, SIFunctionResourceInfo> CallGraphResourceInfo;
std::map<uint32_t, uint32_t> PalMetadata;
std::map<uint32_t, uint32_t> PALMetadataMap;

uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
SIFunctionResourceInfo analyzeResourceUsage(const MachineFunction &MF) const;

void readPalMetadata(Module &M);
void readPALMetadata(Module &M);
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
Expand All @@ -128,8 +128,10 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
/// \brief Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
void EmitProgramInfoR600(const MachineFunction &MF);
void EmitProgramInfoSI(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
void EmitPalMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo);
void EmitProgramInfoSI(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
void EmitPALMetadata(const MachineFunction &MF,
const SIProgramInfo &KernelInfo);
void emitCommonFunctionComments(uint32_t NumVGPR,
uint32_t NumSGPR,
uint32_t ScratchSize,
Expand Down
26 changes: 0 additions & 26 deletions lib/Target/AMDGPU/AMDGPUPTNote.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,32 +44,6 @@ enum NoteType{
NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
};

enum NoteAmdGpuPalMetadataKey {
AMDGPU_PAL_METADATA_LS_NUM_USED_VGPRS = 0x10000015,
AMDGPU_PAL_METADATA_HS_NUM_USED_VGPRS = 0x10000016,
AMDGPU_PAL_METADATA_ES_NUM_USED_VGPRS = 0x10000017,
AMDGPU_PAL_METADATA_GS_NUM_USED_VGPRS = 0x10000018,
AMDGPU_PAL_METADATA_VS_NUM_USED_VGPRS = 0x10000019,
AMDGPU_PAL_METADATA_PS_NUM_USED_VGPRS = 0x1000001a,
AMDGPU_PAL_METADATA_CS_NUM_USED_VGPRS = 0x1000001b,

AMDGPU_PAL_METADATA_LS_NUM_USED_SGPRS = 0x1000001c,
AMDGPU_PAL_METADATA_HS_NUM_USED_SGPRS = 0x1000001d,
AMDGPU_PAL_METADATA_ES_NUM_USED_SGPRS = 0x1000001e,
AMDGPU_PAL_METADATA_GS_NUM_USED_SGPRS = 0x1000001f,
AMDGPU_PAL_METADATA_VS_NUM_USED_SGPRS = 0x10000020,
AMDGPU_PAL_METADATA_PS_NUM_USED_SGPRS = 0x10000021,
AMDGPU_PAL_METADATA_CS_NUM_USED_SGPRS = 0x10000022,

AMDGPU_PAL_METADATA_LS_SCRATCH_SIZE = 0x10000038,
AMDGPU_PAL_METADATA_HS_SCRATCH_SIZE = 0x10000039,
AMDGPU_PAL_METADATA_ES_SCRATCH_SIZE = 0x1000003a,
AMDGPU_PAL_METADATA_GS_SCRATCH_SIZE = 0x1000003b,
AMDGPU_PAL_METADATA_VS_SCRATCH_SIZE = 0x1000003c,
AMDGPU_PAL_METADATA_PS_SCRATCH_SIZE = 0x1000003d,
AMDGPU_PAL_METADATA_CS_SCRATCH_SIZE = 0x1000003e,
};

}
}

Expand Down
22 changes: 13 additions & 9 deletions lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,9 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseDirectiveAMDKernelCodeT();
bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
bool ParseDirectiveAMDGPUHsaKernel();
bool ParseDirectivePalMetadata();

bool ParseDirectivePALMetadata();

bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
RegisterKind RegKind, unsigned Reg1,
unsigned RegNum);
Expand Down Expand Up @@ -2493,18 +2495,20 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
return false;
}

bool AMDGPUAsmParser::ParseDirectivePalMetadata() {
std::vector<uint32_t> Data;
bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
PALMD::Metadata PALMetadata;
for (;;) {
uint32_t Value;
if (ParseAsAbsoluteExpression(Value))
return TokError("invalid value in .amdgpu_pal_metadata");
Data.push_back(Value);
if (ParseAsAbsoluteExpression(Value)) {
return TokError(Twine("invalid value in ") +
Twine(PALMD::AssemblerDirective));
}
PALMetadata.push_back(Value);
if (getLexer().isNot(AsmToken::Comma))
break;
Lex();
}
getTargetStreamer().EmitPalMetadata(Data);
getTargetStreamer().EmitPALMetadata(PALMetadata);
return false;
}

Expand All @@ -2526,8 +2530,8 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".amdgpu_hsa_kernel")
return ParseDirectiveAMDGPUHsaKernel();

if (IDVal == ".amdgpu_pal_metadata")
return ParseDirectivePalMetadata();
if (IDVal == PALMD::AssemblerDirective)
return ParseDirectivePALMetadata();

return true;
}
Expand Down
21 changes: 12 additions & 9 deletions lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,14 @@ bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(StringRef YamlString) {
return true;
}

bool AMDGPUTargetAsmStreamer::EmitPalMetadata(ArrayRef<uint32_t> Data) {
OS << "\t.amdgpu_pal_metadata";
for (auto I = Data.begin(), E = Data.end(); I != E; ++I)
OS << (I == Data.begin() ? " 0x" : ",0x") << Twine::utohexstr(*I);
OS << "\n";
bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
const PALMD::Metadata &PALMetadata) {
std::string PALMetadataString;
auto Error = PALMD::toString(PALMetadata, PALMetadataString);
if (Error)
return false;

OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
return true;
}

Expand Down Expand Up @@ -239,15 +242,15 @@ bool AMDGPUTargetELFStreamer::EmitHSAMetadata(StringRef YamlString) {
return true;
}

bool AMDGPUTargetELFStreamer::EmitPalMetadata(ArrayRef<uint32_t> Data) {
bool AMDGPUTargetELFStreamer::EmitPALMetadata(
const PALMD::Metadata &PALMetadata) {
EmitAMDGPUNote(
MCConstantExpr::create(Data.size() * sizeof(uint32_t), getContext()),
MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
ElfNote::NT_AMDGPU_PAL_METADATA,
[&](MCELFStreamer &OS){
for (auto I : Data)
for (auto I : PALMetadata)
OS.EmitIntValue(I, sizeof(uint32_t));
}
);
return true;
}

Loading

0 comments on commit 2578287

Please sign in to comment.