Skip to content

Commit a9edc75

Browse files
committed
AMDGPU: Add num spilled s/vgprs to metadata
This was requested by tools. Differential Revision: https://reviews.llvm.org/D40321 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319192 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6c826ef commit a9edc75

File tree

6 files changed

+153
-17
lines changed

6 files changed

+153
-17
lines changed

docs/AMDGPUUsage.rst

+10
Original file line numberDiff line numberDiff line change
@@ -1295,6 +1295,16 @@ non-AMD key names should be prefixed by "*vendor-name*.".
12951295
code is capable of
12961296
supporting XNACK. See
12971297
:ref:`amdgpu-target-features`.
1298+
"NumSpilledSGPRs" integer Number of stores from
1299+
a scalar register to
1300+
a register allocator
1301+
created spill
1302+
location.
1303+
"NumSpilledVGPRs" integer Number of stores from
1304+
a vector register to
1305+
a register allocator
1306+
created spill
1307+
location.
12981308
============================ ============== ========= =====================
12991309

13001310
..

include/llvm/Support/AMDGPUMetadata.h

+8
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";
244244
constexpr char IsDynamicCallStack[] = "IsDynamicCallStack";
245245
/// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled.
246246
constexpr char IsXNACKEnabled[] = "IsXNACKEnabled";
247+
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs.
248+
constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs";
249+
/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs.
250+
constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs";
247251
} // end namespace Key
248252

249253
/// \brief In-memory representation of kernel code properties metadata.
@@ -275,6 +279,10 @@ struct Metadata final {
275279
/// \brief True if the generated machine code is capable of supporting XNACK.
276280
/// Optional.
277281
bool mIsXNACKEnabled = false;
282+
/// \brief Number of SGPRs spilled by a wavefront. Optional.
283+
uint16_t mNumSpilledSGPRs = 0;
284+
/// \brief Number of VGPRs spilled by a workitem. Optional.
285+
uint16_t mNumSpilledVGPRs = 0;
278286

279287
/// \brief Default constructor.
280288
Metadata() = default;

lib/Support/AMDGPUMetadata.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ struct MappingTraits<Kernel::CodeProps::Metadata> {
148148
MD.mIsDynamicCallStack, false);
149149
YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled,
150150
MD.mIsXNACKEnabled, false);
151+
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs,
152+
MD.mNumSpilledSGPRs, uint16_t(0));
153+
YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs,
154+
MD.mNumSpilledVGPRs, uint16_t(0));
151155
}
152156
};
153157

lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(
11881188
HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
11891189
HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
11901190
HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
1191+
HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
1192+
HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
11911193

11921194
return HSACodeProps;
11931195
}
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
11
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
33
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
44

5+
@var = addrspace(1) global float 0.0
6+
57
; CHECK: ---
68
; CHECK: Version: [ 1, 0 ]
7-
89
; CHECK: Kernels:
9-
; CHECK: - Name: test
10-
; CHECK: SymbolName: 'test@kd'
11-
; CHECK: CodeProps:
12-
; CHECK: KernargSegmentSize: 24
13-
; CHECK: GroupSegmentFixedSize: 0
14-
; CHECK: PrivateSegmentFixedSize: 0
15-
; CHECK: KernargSegmentAlign: 8
16-
; CHECK: WavefrontSize: 64
17-
; GFX700: NumSGPRs: 6
18-
; GFX800: NumSGPRs: 96
19-
; GFX900: NumSGPRs: 6
20-
; GFX700: NumVGPRs: 4
21-
; GFX800: NumVGPRs: 6
22-
; GFX900: NumVGPRs: 6
23-
; CHECK: MaxFlatWorkGroupSize: 256
10+
11+
; CHECK: - Name: test
12+
; CHECK: SymbolName: 'test@kd'
13+
; CHECK: CodeProps:
14+
; CHECK: KernargSegmentSize: 24
15+
; CHECK: GroupSegmentFixedSize: 0
16+
; CHECK: PrivateSegmentFixedSize: 0
17+
; CHECK: KernargSegmentAlign: 8
18+
; CHECK: WavefrontSize: 64
19+
; CHECK: NumSGPRs: 6
20+
; GFX700: NumVGPRs: 4
21+
; GFX803: NumVGPRs: 6
22+
; GFX900: NumVGPRs: 6
23+
; CHECK: MaxFlatWorkGroupSize: 256
2424
define amdgpu_kernel void @test(
2525
half addrspace(1)* %r,
2626
half addrspace(1)* %a,
@@ -32,3 +32,111 @@ entry:
3232
store half %r.val, half addrspace(1)* %r
3333
ret void
3434
}
35+
36+
; CHECK: - Name: num_spilled_sgprs
37+
; CHECK: SymbolName: 'num_spilled_sgprs@kd'
38+
; CHECK: CodeProps:
39+
; CHECK: NumSpilledSGPRs: 41
40+
define amdgpu_kernel void @num_spilled_sgprs(
41+
i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2,
42+
i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5,
43+
i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8,
44+
i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb,
45+
i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute,
46+
i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4,
47+
i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb,
48+
i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 {
49+
entry:
50+
store i32 %in0, i32 addrspace(1)* %out0
51+
store i32 %in1, i32 addrspace(1)* %out1
52+
store i32 %in2, i32 addrspace(1)* %out2
53+
store i32 %in3, i32 addrspace(1)* %out3
54+
store i32 %in4, i32 addrspace(1)* %out4
55+
store i32 %in5, i32 addrspace(1)* %out5
56+
store i32 %in6, i32 addrspace(1)* %out6
57+
store i32 %in7, i32 addrspace(1)* %out7
58+
store i32 %in8, i32 addrspace(1)* %out8
59+
store i32 %in9, i32 addrspace(1)* %out9
60+
store i32 %ina, i32 addrspace(1)* %outa
61+
store i32 %inb, i32 addrspace(1)* %outb
62+
store i32 %inc, i32 addrspace(1)* %outc
63+
store i32 %ind, i32 addrspace(1)* %outd
64+
store i32 %ine, i32 addrspace(1)* %oute
65+
store i32 %inf, i32 addrspace(1)* %outf
66+
ret void
67+
}
68+
69+
; CHECK: - Name: num_spilled_vgprs
70+
; CHECK: SymbolName: 'num_spilled_vgprs@kd'
71+
; CHECK: CodeProps:
72+
; CHECK: NumSpilledVGPRs: 14
73+
define amdgpu_kernel void @num_spilled_vgprs() #1 {
74+
%val0 = load volatile float, float addrspace(1)* @var
75+
%val1 = load volatile float, float addrspace(1)* @var
76+
%val2 = load volatile float, float addrspace(1)* @var
77+
%val3 = load volatile float, float addrspace(1)* @var
78+
%val4 = load volatile float, float addrspace(1)* @var
79+
%val5 = load volatile float, float addrspace(1)* @var
80+
%val6 = load volatile float, float addrspace(1)* @var
81+
%val7 = load volatile float, float addrspace(1)* @var
82+
%val8 = load volatile float, float addrspace(1)* @var
83+
%val9 = load volatile float, float addrspace(1)* @var
84+
%val10 = load volatile float, float addrspace(1)* @var
85+
%val11 = load volatile float, float addrspace(1)* @var
86+
%val12 = load volatile float, float addrspace(1)* @var
87+
%val13 = load volatile float, float addrspace(1)* @var
88+
%val14 = load volatile float, float addrspace(1)* @var
89+
%val15 = load volatile float, float addrspace(1)* @var
90+
%val16 = load volatile float, float addrspace(1)* @var
91+
%val17 = load volatile float, float addrspace(1)* @var
92+
%val18 = load volatile float, float addrspace(1)* @var
93+
%val19 = load volatile float, float addrspace(1)* @var
94+
%val20 = load volatile float, float addrspace(1)* @var
95+
%val21 = load volatile float, float addrspace(1)* @var
96+
%val22 = load volatile float, float addrspace(1)* @var
97+
%val23 = load volatile float, float addrspace(1)* @var
98+
%val24 = load volatile float, float addrspace(1)* @var
99+
%val25 = load volatile float, float addrspace(1)* @var
100+
%val26 = load volatile float, float addrspace(1)* @var
101+
%val27 = load volatile float, float addrspace(1)* @var
102+
%val28 = load volatile float, float addrspace(1)* @var
103+
%val29 = load volatile float, float addrspace(1)* @var
104+
%val30 = load volatile float, float addrspace(1)* @var
105+
106+
store volatile float %val0, float addrspace(1)* @var
107+
store volatile float %val1, float addrspace(1)* @var
108+
store volatile float %val2, float addrspace(1)* @var
109+
store volatile float %val3, float addrspace(1)* @var
110+
store volatile float %val4, float addrspace(1)* @var
111+
store volatile float %val5, float addrspace(1)* @var
112+
store volatile float %val6, float addrspace(1)* @var
113+
store volatile float %val7, float addrspace(1)* @var
114+
store volatile float %val8, float addrspace(1)* @var
115+
store volatile float %val9, float addrspace(1)* @var
116+
store volatile float %val10, float addrspace(1)* @var
117+
store volatile float %val11, float addrspace(1)* @var
118+
store volatile float %val12, float addrspace(1)* @var
119+
store volatile float %val13, float addrspace(1)* @var
120+
store volatile float %val14, float addrspace(1)* @var
121+
store volatile float %val15, float addrspace(1)* @var
122+
store volatile float %val16, float addrspace(1)* @var
123+
store volatile float %val17, float addrspace(1)* @var
124+
store volatile float %val18, float addrspace(1)* @var
125+
store volatile float %val19, float addrspace(1)* @var
126+
store volatile float %val20, float addrspace(1)* @var
127+
store volatile float %val21, float addrspace(1)* @var
128+
store volatile float %val22, float addrspace(1)* @var
129+
store volatile float %val23, float addrspace(1)* @var
130+
store volatile float %val24, float addrspace(1)* @var
131+
store volatile float %val25, float addrspace(1)* @var
132+
store volatile float %val26, float addrspace(1)* @var
133+
store volatile float %val27, float addrspace(1)* @var
134+
store volatile float %val28, float addrspace(1)* @var
135+
store volatile float %val29, float addrspace(1)* @var
136+
store volatile float %val30, float addrspace(1)* @var
137+
138+
ret void
139+
}
140+
141+
attributes #0 = { "amdgpu-num-sgpr"="14" }
142+
attributes #1 = { "amdgpu-num-vgpr"="20" }

test/MC/AMDGPU/hsa-metadata-kernel-code-props.s

+4
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
// CHECK: KernargSegmentAlign: 16
1515
// CHECK: WavefrontSize: 64
1616
// CHECK: MaxFlatWorkGroupSize: 256
17+
// CHECK: NumSpilledSGPRs: 1
18+
// CHECK: NumSpilledVGPRs: 1
1719
.amd_amdgpu_hsa_metadata
1820
Version: [ 1, 0 ]
1921
Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ]
@@ -27,4 +29,6 @@
2729
KernargSegmentAlign: 16
2830
WavefrontSize: 64
2931
MaxFlatWorkGroupSize: 256
32+
NumSpilledSGPRs: 1
33+
NumSpilledVGPRs: 1
3034
.end_amd_amdgpu_hsa_metadata

0 commit comments

Comments
 (0)