Skip to content

Commit

Permalink
Add a late IR expansion pass for the experimental reduction intrinsics.
Browse files Browse the repository at this point in the history
This pass uses a new target hook to decide whether or not to expand a particular
intrinsic to the shuffevector sequence.

Differential Revision: https://reviews.llvm.org/D32245



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302631 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
aemerson committed May 10, 2017
1 parent a2ef53a commit 0dd30f8
Show file tree
Hide file tree
Showing 16 changed files with 441 additions and 5 deletions.
7 changes: 7 additions & 0 deletions include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -753,6 +753,9 @@ class TargetTransformInfo {
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;

/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
/// @}

private:
Expand Down Expand Up @@ -910,6 +913,7 @@ class TargetTransformInfo::Concept {
VectorType *VecTy) const = 0;
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
};

template <typename T>
Expand Down Expand Up @@ -1219,6 +1223,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
ReductionFlags Flags) const override {
return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
}
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
};

template <typename T>
Expand Down
4 changes: 4 additions & 0 deletions include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,10 @@ class TargetTransformInfoImplBase {
return false;
}

bool shouldExpandReduction(const IntrinsicInst *II) const {
return true;
}

protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
Expand Down
24 changes: 24 additions & 0 deletions include/llvm/CodeGen/ExpandReductions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===----- ExpandReductions.h - Expand experimental reduction intrinsics --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CODEGEN_EXPANDREDUCTIONS_H
#define LLVM_CODEGEN_EXPANDREDUCTIONS_H

#include "llvm/IR/PassManager.h"

namespace llvm {

class ExpandReductionsPass
: public PassInfoMixin<ExpandReductionsPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm

#endif // LLVM_CODEGEN_EXPANDREDUCTIONS_H
4 changes: 4 additions & 0 deletions include/llvm/CodeGen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,10 @@ namespace llvm {
/// printing assembly.
ModulePass *createMachineOutlinerPass();

/// This pass expands the experimental reduction intrinsics into sequences of
/// shuffles.
FunctionPass *createExpandReductionsPass();

} // End llvm namespace

/// Target machine pass initializer for passes with dependencies. Use with
Expand Down
1 change: 1 addition & 0 deletions include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ void initializeEfficiencySanitizerPass(PassRegistry&);
void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
void initializeExpandISelPseudosPass(PassRegistry&);
void initializeExpandPostRAPass(PassRegistry&);
void initializeExpandReductionsPass(PassRegistry&);
void initializeExternalAAWrapperPassPass(PassRegistry&);
void initializeFEntryInserterPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
Expand Down
6 changes: 6 additions & 0 deletions include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE = nullptr);

/// Generates a vector reduction using shufflevectors to reduce the value.
Value *getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind
MinMaxKind = RecurrenceDescriptor::MRK_Invalid,
ArrayRef<Value *> RedOps = ArrayRef<Value *>());

/// Create a target reduction of the given vector. The reduction operation
/// is described by the \p Opcode parameter. min/max reductions require
/// additional information supplied in \p Flags.
Expand Down
3 changes: 3 additions & 0 deletions lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,9 @@ bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
}

bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}

TargetTransformInfo::Concept::~Concept() {}

Expand Down
1 change: 1 addition & 0 deletions lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ add_llvm_library(LLVMCodeGen
ExecutionDepsFix.cpp
ExpandISelPseudos.cpp
ExpandPostRAPseudos.cpp
ExpandReductions.cpp
FaultMaps.cpp
FEntryInserter.cpp
FuncletLayout.cpp
Expand Down
167 changes: 167 additions & 0 deletions lib/CodeGen/ExpandReductions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass implements IR expansion for reduction intrinsics, allowing targets
// to enable the experimental intrinsics until just before codegen.
//
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExpandReductions.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Pass.h"

using namespace llvm;

namespace {

unsigned getOpcode(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::experimental_vector_reduce_fadd:
return Instruction::FAdd;
case Intrinsic::experimental_vector_reduce_fmul:
return Instruction::FMul;
case Intrinsic::experimental_vector_reduce_add:
return Instruction::Add;
case Intrinsic::experimental_vector_reduce_mul:
return Instruction::Mul;
case Intrinsic::experimental_vector_reduce_and:
return Instruction::And;
case Intrinsic::experimental_vector_reduce_or:
return Instruction::Or;
case Intrinsic::experimental_vector_reduce_xor:
return Instruction::Xor;
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
return Instruction::ICmp;
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
return Instruction::FCmp;
default:
llvm_unreachable("Unexpected ID");
}
}

RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::experimental_vector_reduce_smax:
return RecurrenceDescriptor::MRK_SIntMax;
case Intrinsic::experimental_vector_reduce_smin:
return RecurrenceDescriptor::MRK_SIntMin;
case Intrinsic::experimental_vector_reduce_umax:
return RecurrenceDescriptor::MRK_UIntMax;
case Intrinsic::experimental_vector_reduce_umin:
return RecurrenceDescriptor::MRK_UIntMin;
case Intrinsic::experimental_vector_reduce_fmax:
return RecurrenceDescriptor::MRK_FloatMax;
case Intrinsic::experimental_vector_reduce_fmin:
return RecurrenceDescriptor::MRK_FloatMin;
default:
return RecurrenceDescriptor::MRK_Invalid;
}
}

bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
bool Changed = false;
SmallVector<IntrinsicInst*, 4> Worklist;
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
if (auto II = dyn_cast<IntrinsicInst>(&*I))
Worklist.push_back(II);

for (auto *II : Worklist) {
IRBuilder<> Builder(II);
Value *Vec = nullptr;
auto ID = II->getIntrinsicID();
auto MRK = RecurrenceDescriptor::MRK_Invalid;
switch (ID) {
case Intrinsic::experimental_vector_reduce_fadd:
case Intrinsic::experimental_vector_reduce_fmul:
// FMFs must be attached to the call, otherwise it's an ordered reduction
// and it can't be handled by generating this shuffle sequence.
// TODO: Implement scalarization of ordered reductions here for targets
// without native support.
if (!II->getFastMathFlags().unsafeAlgebra())
continue;
Vec = II->getArgOperand(1);
break;
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
case Intrinsic::experimental_vector_reduce_or:
case Intrinsic::experimental_vector_reduce_xor:
case Intrinsic::experimental_vector_reduce_smax:
case Intrinsic::experimental_vector_reduce_smin:
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
Vec = II->getArgOperand(0);
MRK = getMRK(ID);
break;
default:
continue;
}
if (!TTI->shouldExpandReduction(II))
continue;
auto Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
Changed = true;
}
return Changed;
}

class ExpandReductions : public FunctionPass {
public:
static char ID;
ExpandReductions() : FunctionPass(ID) {
initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
}

bool runOnFunction(Function &F) override {
const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
return expandReductions(F, TTI);
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.setPreservesCFG();
}
};
}

char ExpandReductions::ID;
INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
"Expand reduction intrinsics", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
"Expand reduction intrinsics", false, false)

FunctionPass *llvm::createExpandReductionsPass() {
return new ExpandReductions();
}

PreservedAnalyses ExpandReductionsPass::run(Function &F,
FunctionAnalysisManager &AM) {
const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
if (!expandReductions(F, &TTI))
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return PA;
}
3 changes: 3 additions & 0 deletions lib/CodeGen/TargetPassConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,9 @@ void TargetPassConfig::addIRPasses() {

// Insert calls to mcount-like functions.
addPass(createCountingFunctionInserterPass());

// Expand reduction intrinsics into shuffle sequences if the target wants to.
addPass(createExpandReductionsPass());
}

/// Turn exception handling constructs into something the code generators can
Expand Down
4 changes: 4 additions & 0 deletions lib/Target/AArch64/AArch64TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
unsigned getMinPrefetchStride();

unsigned getMaxPrefetchIterationsAhead();

bool shouldExpandReduction(const IntrinsicInst *II) const {
return false;
}
/// @}
};

Expand Down
9 changes: 4 additions & 5 deletions lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1125,11 +1125,10 @@ static Value *addFastMathFlag(Value *V) {
}

// Helper to generate a log2 shuffle reduction.
static Value *
getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind =
RecurrenceDescriptor::MRK_Invalid,
ArrayRef<Value *> RedOps = ArrayRef<Value *>()) {
Value *
llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
ArrayRef<Value *> RedOps) {
unsigned VF = Src->getType()->getVectorNumElements();
// VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
// and vector ops, reducing the set of values being computed by half each
Expand Down
Loading

0 comments on commit 0dd30f8

Please sign in to comment.