Skip to content

Commit

Permalink
[AMDGPU][MC] Corrected ds_wrxchg2* to support two offsets
Browse files Browse the repository at this point in the history
Fixed bug 28227: https://bugs.llvm.org//show_bug.cgi?id=28227

Reviewers: vpykhtin

Differential Revision: https://reviews.llvm.org/D31808

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300066 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
dpreobra committed Apr 12, 2017
1 parent c2dad95 commit df5e51c
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 7 deletions.
28 changes: 21 additions & 7 deletions lib/Target/AMDGPU/DSInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,20 @@ class DS_1A2D_RET<string opName,
let hasPostISelHook = 1;
}

class DS_1A2D_Off8_RET<string opName,
RegisterClass rc = VGPR_32,
RegisterClass src = rc>
: DS_Pseudo<opName,
(outs rc:$vdst),
(ins VGPR_32:$addr, src:$data0, src:$data1, offset0:$offset0, offset1:$offset1, gds:$gds),
"$vdst, $addr, $data0, $data1$offset0$offset1$gds"> {

let has_offset = 0;
let AsmMatchConverter = "cvtDSOffset01";

let hasPostISelHook = 1;
}

class DS_1A_RET<string opName, RegisterClass rc = VGPR_32>
: DS_Pseudo<opName,
(outs rc:$vdst),
Expand Down Expand Up @@ -353,9 +367,9 @@ def DS_MAX_RTN_F32 : DS_1A1D_RET <"ds_max_rtn_f32">,

def DS_WRXCHG_RTN_B32 : DS_1A1D_RET<"ds_wrxchg_rtn_b32">,
AtomicNoRet<"", 1>;
def DS_WRXCHG2_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
def DS_WRXCHG2_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;
def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
AtomicNoRet<"", 1>;

def DS_ADD_RTN_U64 : DS_1A1D_RET<"ds_add_rtn_u64", VReg_64>,
Expand Down Expand Up @@ -394,11 +408,11 @@ def DS_MAX_RTN_F64 : DS_1A1D_RET<"ds_max_rtn_f64", VReg_64>,
AtomicNoRet<"ds_max_f64", 1>;

def DS_WRXCHG_RTN_B64 : DS_1A1D_RET<"ds_wrxchg_rtn_b64", VReg_64>,
AtomicNoRet<"ds_wrxchg_b64", 1>;
def DS_WRXCHG2_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
AtomicNoRet<"ds_wrxchg2_b64", 1>;
def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
AtomicNoRet<"ds_wrxchg2st64_b64", 1>;
AtomicNoRet<"", 1>;
def DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
AtomicNoRet<"", 1>;
def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
AtomicNoRet<"", 1>;

def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">;
def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
Expand Down
16 changes: 16 additions & 0 deletions test/MC/AMDGPU/ds.s
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,18 @@ ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6
// SICI: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xb8,0xd8,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0x5c,0xd8,0x02,0x04,0x06,0x08]

ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255
// SICI: ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xb8,0xd8,0x00,0x00,0x00,0x00]
// VI: ds_wrxchg2_rtn_b32 v[0:1], v0, v0, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x5c,0xd8,0x00,0x00,0x00,0x00]

ds_wrxchg2st64_rtn_b32 v[8:9] v2, v4, v6
// SICI: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0xbc,0xd8,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2st64_rtn_b32 v[8:9], v2, v4, v6 ; encoding: [0x00,0x00,0x5e,0xd8,0x02,0x04,0x06,0x08]

ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255
// SICI: ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xbc,0xd8,0x00,0xff,0x00,0x00]
// VI: ds_wrxchg2st64_rtn_b32 v[0:1], v0, v255, v0 offset0:127 offset1:255 ; encoding: [0x7f,0xff,0x5e,0xd8,0x00,0xff,0x00,0x00]

ds_cmpst_rtn_b32 v8, v2, v4, v6
// SICI: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0xc0,0xd8,0x02,0x04,0x06,0x08]
// VI: ds_cmpst_rtn_b32 v8, v2, v4, v6 ; encoding: [0x00,0x00,0x60,0xd8,0x02,0x04,0x06,0x08]
Expand Down Expand Up @@ -444,10 +452,18 @@ ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
// SICI: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xb8,0xd9,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xdc,0xd8,0x02,0x04,0x06,0x08]

ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255
// SICI: ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xb8,0xd9,0x00,0x01,0x00,0x00]
// VI: ds_wrxchg2_rtn_b64 v[0:3], v0, v[1:2], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xdc,0xd8,0x00,0x01,0x00,0x00]

ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7]
// SICI: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xbc,0xd9,0x02,0x04,0x06,0x08]
// VI: ds_wrxchg2st64_rtn_b64 v[8:11], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xde,0xd8,0x02,0x04,0x06,0x08]

ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255
// SICI: ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xbc,0xd9,0xff,0x00,0x00,0x00]
// VI: ds_wrxchg2st64_rtn_b64 v[0:3], v255, v[0:1], v[0:1] offset0:127 offset1:255 ; encoding: [0x7f,0xff,0xde,0xd8,0xff,0x00,0x00,0x00]

ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7]
// SICI: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xc0,0xd9,0x02,0x04,0x06,0x08]
// VI: ds_cmpst_rtn_b64 v[8:9], v2, v[4:5], v[6:7] ; encoding: [0x00,0x00,0xe0,0xd8,0x02,0x04,0x06,0x08]
Expand Down

0 comments on commit df5e51c

Please sign in to comment.