Skip to content

Commit

Permalink
Remove cloning of functions for quantization and profiling (pytorch#2793
Browse files Browse the repository at this point in the history
)

*Description*: As discussed in pytorch#2700, this removes cloning the function for quantization and profiling. If the caller wants the original function after quantization/profiling it should clone the function prior to calling `quantizeFunction()`/`profileQuantization()`.

*Testing*: All tests still pass.
  • Loading branch information
jfix71 authored Apr 25, 2019
1 parent bbaba50 commit 790da69
Show file tree
Hide file tree
Showing 12 changed files with 92 additions and 159 deletions.
11 changes: 3 additions & 8 deletions examples/fr2en.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ struct Model {
::lower(F_, &loweredMap_);

// Instrument the graph to capture profiles for nodes' outputs.
F_ = glow::profileQuantization(bindings, F_);
glow::profileQuantization(bindings, F_);
}

// Load the quantization profile and transform the graph.
Expand All @@ -174,13 +174,8 @@ struct Model {
deserializeFromYaml(loadProfileFileOpt)};

// Quantize the graph based on the captured profile.
auto *Q = quantization::quantizeFunction(F_, quantConfig,
*EE_.getBackend(), loweredMap_);

// Erase the original function so that the redundant variables that are
// only referenced by the original function will be removed.
Q->getParent()->eraseFunction(F_);
F_ = Q;
quantization::quantizeFunction(F_, quantConfig, *EE_.getBackend(),
loweredMap_);
}

// Do not create constants if we're profiling; the newly allocate histogram
Expand Down
7 changes: 2 additions & 5 deletions include/glow/Optimizer/Optimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,8 @@ void convertPlaceholdersToConstants(Function *F,

/// Instrument function \p F by inserting quantization profile nodes for
/// capturing stats for quantization. The nodes will refer to tensors allocate
/// in in context \p bindings. The new quantized function is called \p
/// newFuncName. If no name is given the method will generate a name. \returns
/// a new function with the added quantization nodes.
Function *profileQuantization(PlaceholderBindings &bindings, Function *F,
llvm::StringRef newFuncName = "");
/// in context \p bindings.
void profileQuantization(PlaceholderBindings &bindings, Function *F);

/// Helper to generate and optimize IR from given Function \p F. \p
/// shouldShareBuffers signifies whether to use the share buffers optimization.
Expand Down
20 changes: 8 additions & 12 deletions include/glow/Quantization/Quantization.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,15 @@ std::vector<NodeQuantizationInfo> generateNodeQuantizationInfos(
const LoweredInfoMap &loweredMap = {}, Schema schema = Schema::Asymmetric,
ElemKind quantizationPrecision = ElemKind::Int8QTy);

/// Quantizes the function \p F into a new unoptimized partially quantized
/// function based on configuration from \p quantConfig. This method converts to
/// integer as many nodes as permitted by the backend \p B.
/// Quantizes the function \p F into an unoptimized partially quantized function
/// based on configuration from \p quantConfig. This method converts to integer
/// as many nodes as permitted by the backend \p B. \p loweredMap contains info
/// about what nodes were lowered from what, to be used during quantization.
/// \p doNotQuantizeKinds lists kinds to not quantize, even if a profile was
/// gathered for them and the backend supports the quantized operation. This
/// method clones original function \p F and caller is responsible for cleaning
/// up/erasing original function \p F if needed. \returns a new quantized
/// function.
Function *quantizeFunction(Function *F,
const QuantizationConfiguration &quantConfig,
const Backend &B,
const LoweredInfoMap &loweredMap = {},
const KindSet &doNotQuantizeKinds = {});
/// gathered for them and the backend supports the quantized operation.
void quantizeFunction(Function *F, const QuantizationConfiguration &quantConfig,
const Backend &B, const LoweredInfoMap &loweredMap = {},
const KindSet &doNotQuantizeKinds = {});

} // namespace quantization
} // namespace glow
Expand Down
10 changes: 3 additions & 7 deletions lib/Onnxifi/InlineOnnxifi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,20 +61,16 @@ InlineGraph::initGraph(const void *onnxModel, size_t onnxModelSize,
if (quantizationStep_ == OnnxifiQuantizationStep::Profile) {
lower(function_, &loweredMap_, executionEngine_.getBackend());
PlaceholderBindings dummyCtx;
function_ = profileQuantization(dummyCtx, function_);
profileQuantization(dummyCtx, function_);
}

// -- Quantize --
if (quantizationStep_ == OnnxifiQuantizationStep::Quantize) {
quantization::QuantizationConfiguration quantConfig{
deserializeFromYaml(getProfileFile(modelHash_))};
quantConfig.schema = quantization::Schema::Symmetric;
quantConfig.newFuncName = function_->getName();
function_->setName("old");
auto *Q = quantization::quantizeFunction(
function_, quantConfig, *executionEngine_.getBackend(), loweredMap_);
Q->getParent()->eraseFunction(function_);
function_ = Q;
quantization::quantizeFunction(function_, quantConfig,
*executionEngine_.getBackend(), loweredMap_);
}

executionEngine_.compile(CompilationMode::Infer, function_);
Expand Down
23 changes: 5 additions & 18 deletions lib/Optimizer/Quantization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,13 @@

using namespace glow;

Function *glow::profileQuantization(PlaceholderBindings &bindings, Function *F,
llvm::StringRef newFuncName) {
// Create a new name for the differentiated function, if none is given.
std::string tmpName;
if (newFuncName.empty()) {
tmpName = std::string(F->getName()) + "_profile";
newFuncName = tmpName;
}

// Clone the function.
Function *G = F->clone(newFuncName);

void glow::profileQuantization(PlaceholderBindings &bindings, Function *F) {
// Iterate over all nodes in the graph and insert QuantizationProfile nodes
// to observe tensor values from every node's output.
std::unordered_set<NodeValue> nodesToInstrument;

// Add Quantization Profile node to all of the floating point outputs.
for (auto &node : G->getNodes()) {
for (auto &node : F->getNodes()) {
for (unsigned i = 0, e = node.getNumResults(); i < e; ++i) {
if (node.getElementType(i) != ElemKind::FloatTy) {
continue;
Expand All @@ -50,25 +39,23 @@ Function *glow::profileQuantization(PlaceholderBindings &bindings, Function *F,
}

// Add Quantization Profile node to all floating point vars.
for (const auto &var : G->getParent()->getConstants()) {
for (const auto &var : F->getParent()->getConstants()) {
if (var->getOutput().getElementType() != ElemKind::FloatTy) {
continue;
}
nodesToInstrument.insert(var->getOutput());
}

// Add Quantization Profile node to all floating point placeholders.
for (const auto &PH : G->getParent()->getPlaceholders()) {
for (const auto &PH : F->getParent()->getPlaceholders()) {
if (PH->getOutput().getElementType() != ElemKind::FloatTy) {
continue;
}
nodesToInstrument.insert(PH->getOutput());
}

for (const auto &NV : nodesToInstrument) {
G->createQuantizationProfile(bindings,
F->createQuantizationProfile(bindings,
"QP_" + NV.getNode()->getName().str(), NV);
}

return G;
}
14 changes: 4 additions & 10 deletions lib/Quantization/Quantization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -898,26 +898,20 @@ generateNodeQuantizationInfos(PlaceholderBindings &bindings, const Function *F,
return quantizationInfos;
}

Function *quantizeFunction(Function *F,
const QuantizationConfiguration &quantConfig,
const Backend &B, const LoweredInfoMap &loweredMap,
const KindSet &doNotQuantizeKinds) {
void quantizeFunction(Function *F, const QuantizationConfiguration &quantConfig,
const Backend &B, const LoweredInfoMap &loweredMap,
const KindSet &doNotQuantizeKinds) {
assert((quantConfig.precision == ElemKind::Int8QTy ||
quantConfig.precision == ElemKind::Int16QTy) &&
"Only Int8 and Int16 quantization supported");
Function *G = F->clone(quantConfig.newFuncName.empty()
? F->getName().str() + "_quantized"
: quantConfig.newFuncName);

FunctionQuantizer quantizer(*G, B, quantConfig.schema, quantConfig.infos,
FunctionQuantizer quantizer(*F, B, quantConfig.schema, quantConfig.infos,
quantConfig.precision, doNotQuantizeKinds,
loweredMap, quantConfig.assertAllNodesQuantized);
quantizer.convert();
if (quantConfig.enableRowwise) {
quantizer.enableRowwise();
}

return G;
}

} // namespace quantization
Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/BackendTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ TEST(Interpreter, profileQuantizationForANetwork) {
O = F->createRELU("relu", O);
O = F->createRegression("reg", O, Ex);

F = ::glow::profileQuantization(ctx, F);
::glow::profileQuantization(ctx, F);

ctx.allocate(A);
ctx.allocate(Ex);
Expand Down
30 changes: 15 additions & 15 deletions tests/unittests/BackendTestUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,16 @@ static Placeholder *createQuantizedPlaceholder(Module &mod,
/// Clone, profile, and run \p origF given the \p ctx and \p EE. \returns the
/// quantization parameters from the profile, given the lowered info passed in
/// via \p loweredMap, and the specified \p schema.
static std::vector<NodeQuantizationInfo> profileAndGetNodeQuantizationInfo(
PlaceholderBindings &bindings, ExecutionEngine &EE, Function *origF,
const LoweredInfoMap &loweredMap, quantization::Schema schema) {
Function *profileF = glow::profileQuantization(bindings, origF);
static std::vector<NodeQuantizationInfo>
profileAndGetNodeQuantizationInfo(PlaceholderBindings &bindings,
ExecutionEngine &EE, Function *origF,
quantization::Schema schema) {
// Lower everything for profiling in a cloned PF, keeping track of lowered
// info in loweredMap, which is then used when generating QI.
Function *profileF = origF->clone("profile");
LoweredInfoMap loweredMap;
lower(profileF, &loweredMap, EE.getBackend());
glow::profileQuantization(bindings, profileF);
EE.compile(CompilationMode::Infer, profileF);

EE.run(bindings);
Expand All @@ -81,14 +87,8 @@ static void profileAndQuantize(PlaceholderBindings &Ibindings,
ElemKind backendElemKind,
quantization::Schema schema,
bool enableRowwiseQuantization) {
// Lower everything for profiling in a cloned PF, keeping track of lowered
// info in loweredMap, which is then used when generating QI.
Function *PF = IF->clone("profile");
LoweredInfoMap loweredMapForProf;
lower(PF, &loweredMapForProf, IEE.getBackend());
quantization::QuantizationConfiguration quantConfig{
profileAndGetNodeQuantizationInfo(Ibindings, IEE, PF, loweredMapForProf,
schema)};
profileAndGetNodeQuantizationInfo(Ibindings, IEE, IF, schema)};
quantConfig.enableRowwise = enableRowwiseQuantization;
quantConfig.schema = schema;
quantConfig.assertAllNodesQuantized = true;
Expand All @@ -98,16 +98,16 @@ static void profileAndQuantize(PlaceholderBindings &Ibindings,
// Lower only as the backends prefer for actually quantizing.
LoweredInfoMap loweredMapForQuant;
lower(IF, &loweredMapForQuant, IEE.getBackend());
IF = quantization::quantizeFunction(IF, quantConfig, *IEE.getBackend(),
loweredMapForQuant);
quantization::quantizeFunction(IF, quantConfig, *IEE.getBackend(),
loweredMapForQuant);
}
if (isQuantizedElemKind(backendElemKind)) {
quantConfig.precision = backendElemKind;
// Lower only as the backends prefer for actually quantizing.
LoweredInfoMap loweredMapForQuant;
lower(BF, &loweredMapForQuant, BEE.getBackend());
BF = quantization::quantizeFunction(BF, quantConfig, *BEE.getBackend(),
loweredMapForQuant);
quantization::quantizeFunction(BF, quantConfig, *BEE.getBackend(),
loweredMapForQuant);
}
}

Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/GraphTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ TEST(Graph, QuantizationProfileNodes) {

// Simulate actual usage.
::optimize(F, CompilationMode::Infer);
F = ::glow::profileQuantization(bindings, F);
::glow::profileQuantization(bindings, F);
auto backend = MockBackend();
lower(F, /* loweredMap */ nullptr, &backend);
::optimize(F, CompilationMode::Infer);
Expand Down
16 changes: 8 additions & 8 deletions tests/unittests/MLTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1061,7 +1061,7 @@ TEST_P(InterpreterAndCPU, convNetForImageRecognition) {
lower(PF, &loweredMapForProf);

// Profiling:
PF = glow::profileQuantization(bindings, PF);
glow::profileQuantization(bindings, PF);
EE.compile(CompilationMode::Infer, PF);
runBatch(EE, bindings, 100, sampleCounter, {input}, {&images});

Expand All @@ -1083,10 +1083,10 @@ TEST_P(InterpreterAndCPU, convNetForImageRecognition) {
// Build the new quantized graph.
LoweredInfoMap loweredMapForQuant;
lower(F, &loweredMapForQuant, EE.getBackend());
Function *QP = quantization::quantizeFunction(
F, quantConfig, *EE.getBackend(), loweredMapForQuant, doNotQuantizeKinds);
quantization::quantizeFunction(F, quantConfig, *EE.getBackend(),
loweredMapForQuant, doNotQuantizeKinds);

EE.compile(CompilationMode::Infer, QP);
EE.compile(CompilationMode::Infer, F);

// Generate the images used for testing.
Tensor testImages(ElemKind::FloatTy, {batchSize, 8, 8, 1});
Expand Down Expand Up @@ -1183,7 +1183,7 @@ TEST_P(InterpreterAndCPU, testFindPixelRegression) {
lower(PF, &loweredMapForProf);

// Profile the fully lowered 'F', 'PF'.
PF = glow::profileQuantization(bindings, PF);
glow::profileQuantization(bindings, PF);
EE.compile(CompilationMode::Infer, PF);

// Run the graph to capture the profile.
Expand All @@ -1203,10 +1203,10 @@ TEST_P(InterpreterAndCPU, testFindPixelRegression) {
// Build the new quantized graph.
LoweredInfoMap loweredMapForQuant;
lower(F, &loweredMapForQuant, EE.getBackend());
Function *QP = quantization::quantizeFunction(
F, quantConfig, *EE.getBackend(), loweredMapForQuant);
quantization::quantizeFunction(F, quantConfig, *EE.getBackend(),
loweredMapForQuant);

EE.compile(CompilationMode::Infer, QP);
EE.compile(CompilationMode::Infer, F);

// Generate the images used for testing.
Tensor testImages(ElemKind::FloatTy, {batchSize, 10, 10, 1});
Expand Down
Loading

0 comments on commit 790da69

Please sign in to comment.