Skip to content

Commit

Permalink
AMDGPU: Improve accuracy of instruction rates for some FP instructions
Browse files Browse the repository at this point in the history
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245774 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
arsenm committed Aug 22, 2015
1 parent ac03979 commit e48caeb
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 13 deletions.
5 changes: 5 additions & 0 deletions lib/Target/AMDGPU/CIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;

let SubtargetPredicate = isCIVI in {

let SchedRW = [WriteDoubleAdd] in {
defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
VOP_F64_F64, ftrunc
>;
Expand All @@ -53,12 +54,16 @@ defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
VOP_F64_F64, frint
>;
} // End SchedRW = [WriteDoubleAdd]

let SchedRW = [WriteQuarterRate32] in {
defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
VOP_F32_F32
>;
defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
VOP_F32_F32
>;
} // End SchedRW = [WriteQuarterRate32]

//===----------------------------------------------------------------------===//
// VOP3 Instructions
Expand Down
29 changes: 22 additions & 7 deletions lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1286,14 +1286,19 @@ defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
VOP_F64_F64, fsqrt
>;

} // let SchedRW = [WriteDouble]
} // End SchedRW = [WriteDouble]

let SchedRW = [WriteQuarterRate32] in {

defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
VOP_F32_F32, AMDGPUsin
>;
defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
VOP_F32_F32, AMDGPUcos
>;

} // End SchedRW = [WriteQuarterRate32]

defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
Expand All @@ -1302,10 +1307,18 @@ defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
VOP_I32_F64
>;

let SchedRW = [WriteDoubleAdd] in {
defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
VOP_F64_F64
>;
defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;

defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
VOP_F64_F64
>;
} // End SchedRW = [WriteDoubleAdd]


defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
VOP_I32_F32
>;
Expand Down Expand Up @@ -1337,7 +1350,7 @@ defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
VOP_F32_F32, AMDGPUrsq_legacy
>;

} // End let SchedRW = [WriteQuarterRate32]
} // End SchedRW = [WriteQuarterRate32]

let SchedRW = [WriteDouble] in {

Expand Down Expand Up @@ -1698,15 +1711,15 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <
vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
>;

let SchedRW = [WriteDouble] in {
let SchedRW = [WriteDoubleAdd] in {

defm V_DIV_FIXUP_F64 : VOP3Inst <
vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
>;

} // let SchedRW = [WriteDouble]
} // End SchedRW = [WriteDouble]

let SchedRW = [WriteDouble] in {
let SchedRW = [WriteDoubleAdd] in {
let isCommutable = 1 in {

defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
Expand All @@ -1729,7 +1742,7 @@ defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
VOP_F64_F64_I32, AMDGPUldexp
>;

} // let SchedRW = [WriteDouble]
} // let SchedRW = [WriteDoubleAdd]

let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {

Expand Down Expand Up @@ -1760,6 +1773,7 @@ defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;

let isCommutable = 1, Uses = [VCC] in {

let SchedRW = [WriteFloatFMA] in {
// v_div_fmas_f32:
// result = src0 * src1 + src2
// if (vcc)
Expand All @@ -1768,6 +1782,7 @@ let isCommutable = 1, Uses = [VCC] in {
defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
>;
}

let SchedRW = [WriteDouble] in {
// v_div_fmas_f64:
Expand Down
6 changes: 3 additions & 3 deletions test/CodeGen/AMDGPU/sint_to_fp.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {

; SI-LABEL: @v_sint_to_fp_i64_to_f64
; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
Expand Down
6 changes: 3 additions & 3 deletions test/CodeGen/AMDGPU/uint_to_fp.f64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone

; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
; SI: buffer_store_dwordx2 [[RESULT]]
define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
Expand Down

0 comments on commit e48caeb

Please sign in to comment.