Skip to content

Commit

Permalink
[X86, AVX2] Replace inserti128 and extracti128 intrinsics with generi…
Browse files Browse the repository at this point in the history
…c shuffles

This should complete the job started in r231794 and continued in r232045:
We want to replace as much custom x86 shuffling via intrinsics
as possible because pushing the code down the generic shuffle
optimization path allows for better codegen and less complexity
in LLVM.

AVX2 introduced proper integer variants of the hacked integer insert/extract
C intrinsics that were created for this same functionality with AVX1.

This should complete the removal of insert/extract128 intrinsics.

The Clang precursor patch for this change was checked in at r232109.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232120 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
rotateright committed Mar 12, 2015
1 parent 740aee4 commit cae9695
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 52 deletions.
7 changes: 0 additions & 7 deletions include/llvm/IR/IntrinsicsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1759,13 +1759,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".

// Vector extract and insert
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
llvm_i8_ty], [IntrNoMem]>;
def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;

def int_x86_avx512_mask_vextractf32x4_512 :
GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,
Expand Down
22 changes: 0 additions & 22 deletions lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4966,28 +4966,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, Res);
return nullptr;
}
case Intrinsic::x86_avx2_vinserti128: {
EVT DestVT = TLI.getValueType(I.getType());
EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
ElVT.getVectorNumElements();
Res =
DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT,
getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)),
DAG.getConstant(Idx, TLI.getVectorIdxTy()));
setValue(&I, Res);
return nullptr;
}
case Intrinsic::x86_avx2_vextracti128: {
EVT DestVT = TLI.getValueType(I.getType());
uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
DestVT.getVectorNumElements();
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT,
getValue(I.getArgOperand(0)),
DAG.getConstant(Idx, TLI.getVectorIdxTy()));
setValue(&I, Res);
return nullptr;
}
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
Expand Down
8 changes: 6 additions & 2 deletions lib/IR/AutoUpgrade.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx.vinsertf128.pd.256" ||
Name == "x86.avx.vinsertf128.ps.256" ||
Name == "x86.avx.vinsertf128.si.256" ||
Name == "x86.avx2.vinserti128" ||
Name == "x86.avx.vextractf128.pd.256" ||
Name == "x86.avx.vextractf128.ps.256" ||
Name == "x86.avx.vextractf128.si.256" ||
Name == "x86.avx2.vextracti128" ||
Name == "x86.avx.movnt.dq.256" ||
Name == "x86.avx.movnt.pd.256" ||
Name == "x86.avx.movnt.ps.256" ||
Expand Down Expand Up @@ -634,7 +636,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op1, ConstantVector::get(Idxs));
} else if (Name == "llvm.x86.avx.vinsertf128.pd.256" ||
Name == "llvm.x86.avx.vinsertf128.ps.256" ||
Name == "llvm.x86.avx.vinsertf128.si.256") {
Name == "llvm.x86.avx.vinsertf128.si.256" ||
Name == "llvm.x86.avx2.vinserti128") {
Value *Op0 = CI->getArgOperand(0);
Value *Op1 = CI->getArgOperand(1);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Expand Down Expand Up @@ -679,7 +682,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Rep, ConstantVector::get(Idxs2));
} else if (Name == "llvm.x86.avx.vextractf128.pd.256" ||
Name == "llvm.x86.avx.vextractf128.ps.256" ||
Name == "llvm.x86.avx.vextractf128.si.256") {
Name == "llvm.x86.avx.vextractf128.si.256" ||
Name == "llvm.x86.avx2.vextracti128") {
Value *Op0 = CI->getArgOperand(0);
unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
VectorType *VecTy = cast<VectorType>(CI->getType());
Expand Down
4 changes: 1 addition & 3 deletions lib/Target/X86/X86InstrSSE.td
Original file line number Diff line number Diff line change
Expand Up @@ -8595,9 +8595,7 @@ def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
//
def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
(ins VR256:$src1, u8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
Sched<[WriteShuffle256]>, VEX, VEX_L;
let hasSideEffects = 0, mayStore = 1 in
def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
Expand Down
23 changes: 22 additions & 1 deletion test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s

define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
; CHECK: vpblendw
Expand Down Expand Up @@ -62,3 +62,24 @@ define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone


define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
; CHECK-LABEL: test_x86_avx2_vextracti128:
; CHECK: vextracti128

%res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7)
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone


define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_avx2_vinserti128:
; CHECK: vinserti128

%res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7)
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone

18 changes: 1 addition & 17 deletions test/CodeGen/X86/avx2-intrinsics-x86.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx2 | FileCheck %s

define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
; CHECK: vpackssdw
Expand Down Expand Up @@ -775,22 +775,6 @@ define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly


define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
; CHECK: vextracti128
%res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone


define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
; CHECK: vinserti128
%res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone


define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
; CHECK: vpmaskmovq
%res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
Expand Down

0 comments on commit cae9695

Please sign in to comment.