Skip to content

Commit

Permalink
[X86] Fix a regression introduced by r223641.
Browse files Browse the repository at this point in the history
The permps and permd instructions have their operands swapped compared to the
intrinsic definition. Therefore, they do not fall into the INTR_TYPE_2OP
category.

I did not create a new category for those two, as they are the only one AFAICT
in that case.

<rdar://problem/20108262>


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232085 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
Quentin Colombet committed Mar 12, 2015
1 parent 63a0e36 commit be45e0e
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
7 changes: 7 additions & 0 deletions lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14711,6 +14711,13 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
switch (IntNo) {
default: return SDValue(); // Don't custom lower most intrinsics.

case Intrinsic::x86_avx2_permd:
case Intrinsic::x86_avx2_permps:
// Operands intentionally swapped. Mask is last operand to intrinsic,
// but second operand for node/instruction.
return DAG.getNode(X86ISD::VPERMV, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(1));

case Intrinsic::x86_avx512_mask_valign_q_512:
case Intrinsic::x86_avx512_mask_valign_d_512:
// Vector source operands are swapped.
Expand Down
2 changes: 0 additions & 2 deletions lib/Target/X86/X86IntrinsicsInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_permd, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, INTR_TYPE_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
Expand Down
10 changes: 8 additions & 2 deletions test/CodeGen/X86/avx2-intrinsics-x86.ll
Original file line number Diff line number Diff line change
Expand Up @@ -746,15 +746,21 @@ declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly


define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: vpermd
; Check that the arguments are swapped between the intrinsic definition
; and its lowering. Indeed, the offsets are the first source in
; the instruction.
; CHECK: vpermd %ymm0, %ymm1, %ymm0
%res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
ret <8 x i32> %res
}
declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly


define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
; CHECK: vpermps
; Check that the arguments are swapped between the intrinsic definition
; and its lowering. Indeed, the offsets are the first source in
; the instruction.
; CHECK: vpermps %ymm0, %ymm1, %ymm0
%res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
ret <8 x float> %res
}
Expand Down
4 changes: 2 additions & 2 deletions test/CodeGen/X86/stack-folding-int-avx2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ define <8 x i32> @stack_fold_permd(<8 x i32> %a0, <8 x i32> %a1) {
;CHECK-LABEL: stack_fold_permd
;CHECK: vpermd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1)
%2 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
ret <8 x i32> %2
}
declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
Expand All @@ -461,7 +461,7 @@ define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) {
;CHECK-LABEL: stack_fold_permps
;CHECK: vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1)
%2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0)
ret <8 x float> %2
}
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
Expand Down

0 comments on commit be45e0e

Please sign in to comment.