Remove cloning of functions for quantization and profiling (pytorch#2793

) *Description*: As discussed in pytorch#2700, this removes cloning the function for quantization and profiling. If the caller wants the original function after quantization/profiling it should clone the function prior to calling `quantizeFunction()`/`profileQuantization()`. *Testing*: All tests still pass.
rdzhabarov · Apr 25, 2019 · 790da69 · 790da69
1 parent bbaba50
commit 790da69
Show file tree

Hide file tree

Showing 12 changed files with 92 additions and 159 deletions.
diff --git a/examples/fr2en.cpp b/examples/fr2en.cpp
@@ -157,7 +157,7 @@ struct Model {
       ::lower(F_, &loweredMap_);
 
       // Instrument the graph to capture profiles for nodes' outputs.
-      F_ = glow::profileQuantization(bindings, F_);
+      glow::profileQuantization(bindings, F_);
     }
 
     // Load the quantization profile and transform the graph.
@@ -174,13 +174,8 @@ struct Model {
           deserializeFromYaml(loadProfileFileOpt)};
 
       // Quantize the graph based on the captured profile.
-      auto *Q = quantization::quantizeFunction(F_, quantConfig,
-                                               *EE_.getBackend(), loweredMap_);
-
-      // Erase the original function so that the redundant variables that are
-      // only referenced by the original function will be removed.
-      Q->getParent()->eraseFunction(F_);
-      F_ = Q;
+      quantization::quantizeFunction(F_, quantConfig, *EE_.getBackend(),
+                                     loweredMap_);
     }
 
     // Do not create constants if we're profiling; the newly allocate histogram

diff --git a/include/glow/Optimizer/Optimizer.h b/include/glow/Optimizer/Optimizer.h
@@ -61,11 +61,8 @@ void convertPlaceholdersToConstants(Function *F,
 
 /// Instrument function \p F by inserting quantization profile nodes for
 /// capturing stats for quantization. The nodes will refer to tensors allocate
-/// in in context \p bindings. The new quantized function is called \p
-/// newFuncName. If no name is given the method will generate a name.  \returns
-/// a new function with the added quantization nodes.
-Function *profileQuantization(PlaceholderBindings &bindings, Function *F,
-                              llvm::StringRef newFuncName = "");
+/// in context \p bindings.
+void profileQuantization(PlaceholderBindings &bindings, Function *F);
 
 /// Helper to generate and optimize IR from given Function \p F. \p
 /// shouldShareBuffers signifies whether to use the share buffers optimization.

diff --git a/include/glow/Quantization/Quantization.h b/include/glow/Quantization/Quantization.h
@@ -42,19 +42,15 @@ std::vector<NodeQuantizationInfo> generateNodeQuantizationInfos(
     const LoweredInfoMap &loweredMap = {}, Schema schema = Schema::Asymmetric,
     ElemKind quantizationPrecision = ElemKind::Int8QTy);
 
-/// Quantizes the function \p F into a new unoptimized partially quantized
-/// function based on configuration from \p quantConfig. This method converts to
-/// integer as many nodes as permitted by the backend \p B.
+/// Quantizes the function \p F into an unoptimized partially quantized function
+/// based on configuration from \p quantConfig. This method converts to integer
+/// as many nodes as permitted by the backend \p B. \p loweredMap contains info
+/// about what nodes were lowered from what, to be used during quantization.
 /// \p doNotQuantizeKinds lists kinds to not quantize, even if a profile was
-/// gathered for them and the backend supports the quantized operation.  This
-/// method clones original function \p F and caller is responsible for cleaning
-/// up/erasing original function \p F if needed. \returns a new quantized
-/// function.
-Function *quantizeFunction(Function *F,
-                           const QuantizationConfiguration &quantConfig,
-                           const Backend &B,
-                           const LoweredInfoMap &loweredMap = {},
-                           const KindSet &doNotQuantizeKinds = {});
+/// gathered for them and the backend supports the quantized operation.
+void quantizeFunction(Function *F, const QuantizationConfiguration &quantConfig,
+                      const Backend &B, const LoweredInfoMap &loweredMap = {},
+                      const KindSet &doNotQuantizeKinds = {});
 
 } // namespace quantization
 } // namespace glow

diff --git a/lib/Onnxifi/InlineOnnxifi.cpp b/lib/Onnxifi/InlineOnnxifi.cpp
@@ -61,20 +61,16 @@ InlineGraph::initGraph(const void *onnxModel, size_t onnxModelSize,
   if (quantizationStep_ == OnnxifiQuantizationStep::Profile) {
     lower(function_, &loweredMap_, executionEngine_.getBackend());
     PlaceholderBindings dummyCtx;
-    function_ = profileQuantization(dummyCtx, function_);
+    profileQuantization(dummyCtx, function_);
   }
 
   // -- Quantize --
   if (quantizationStep_ == OnnxifiQuantizationStep::Quantize) {
     quantization::QuantizationConfiguration quantConfig{
         deserializeFromYaml(getProfileFile(modelHash_))};
     quantConfig.schema = quantization::Schema::Symmetric;
-    quantConfig.newFuncName = function_->getName();
-    function_->setName("old");
-    auto *Q = quantization::quantizeFunction(
-        function_, quantConfig, *executionEngine_.getBackend(), loweredMap_);
-    Q->getParent()->eraseFunction(function_);
-    function_ = Q;
+    quantization::quantizeFunction(function_, quantConfig,
+                                   *executionEngine_.getBackend(), loweredMap_);
   }
 
   executionEngine_.compile(CompilationMode::Infer, function_);

diff --git a/lib/Optimizer/Quantization.cpp b/lib/Optimizer/Quantization.cpp
@@ -23,24 +23,13 @@
 
 using namespace glow;
 
-Function *glow::profileQuantization(PlaceholderBindings &bindings, Function *F,
-                                    llvm::StringRef newFuncName) {
-  // Create a new name for the differentiated function, if none is given.
-  std::string tmpName;
-  if (newFuncName.empty()) {
-    tmpName = std::string(F->getName()) + "_profile";
-    newFuncName = tmpName;
-  }
-
-  // Clone the function.
-  Function *G = F->clone(newFuncName);
-
+void glow::profileQuantization(PlaceholderBindings &bindings, Function *F) {
   // Iterate over all nodes in the graph and insert QuantizationProfile nodes
   // to observe tensor values from every node's output.
   std::unordered_set<NodeValue> nodesToInstrument;
 
   // Add Quantization Profile node to all of the floating point outputs.
-  for (auto &node : G->getNodes()) {
+  for (auto &node : F->getNodes()) {
     for (unsigned i = 0, e = node.getNumResults(); i < e; ++i) {
       if (node.getElementType(i) != ElemKind::FloatTy) {
         continue;
@@ -50,25 +39,23 @@ Function *glow::profileQuantization(PlaceholderBindings &bindings, Function *F,
   }
 
   // Add Quantization Profile node to all floating point vars.
-  for (const auto &var : G->getParent()->getConstants()) {
+  for (const auto &var : F->getParent()->getConstants()) {
     if (var->getOutput().getElementType() != ElemKind::FloatTy) {
       continue;
     }
     nodesToInstrument.insert(var->getOutput());
   }
 
   // Add Quantization Profile node to all floating point placeholders.
-  for (const auto &PH : G->getParent()->getPlaceholders()) {
+  for (const auto &PH : F->getParent()->getPlaceholders()) {
     if (PH->getOutput().getElementType() != ElemKind::FloatTy) {
       continue;
     }
     nodesToInstrument.insert(PH->getOutput());
   }
 
   for (const auto &NV : nodesToInstrument) {
-    G->createQuantizationProfile(bindings,
+    F->createQuantizationProfile(bindings,
                                  "QP_" + NV.getNode()->getName().str(), NV);
   }
-
-  return G;
 }
diff --git a/lib/Quantization/Quantization.cpp b/lib/Quantization/Quantization.cpp
@@ -898,26 +898,20 @@ generateNodeQuantizationInfos(PlaceholderBindings &bindings, const Function *F,
   return quantizationInfos;
 }
 
-Function *quantizeFunction(Function *F,
-                           const QuantizationConfiguration &quantConfig,
-                           const Backend &B, const LoweredInfoMap &loweredMap,
-                           const KindSet &doNotQuantizeKinds) {
+void quantizeFunction(Function *F, const QuantizationConfiguration &quantConfig,
+                      const Backend &B, const LoweredInfoMap &loweredMap,
+                      const KindSet &doNotQuantizeKinds) {
   assert((quantConfig.precision == ElemKind::Int8QTy ||
           quantConfig.precision == ElemKind::Int16QTy) &&
          "Only Int8 and Int16 quantization supported");
-  Function *G = F->clone(quantConfig.newFuncName.empty()
-                             ? F->getName().str() + "_quantized"
-                             : quantConfig.newFuncName);
 
-  FunctionQuantizer quantizer(*G, B, quantConfig.schema, quantConfig.infos,
+  FunctionQuantizer quantizer(*F, B, quantConfig.schema, quantConfig.infos,
                               quantConfig.precision, doNotQuantizeKinds,
                               loweredMap, quantConfig.assertAllNodesQuantized);
   quantizer.convert();
   if (quantConfig.enableRowwise) {
     quantizer.enableRowwise();
   }
-
-  return G;
 }
 
 } // namespace quantization

diff --git a/tests/unittests/BackendTest.cpp b/tests/unittests/BackendTest.cpp
@@ -63,7 +63,7 @@ TEST(Interpreter, profileQuantizationForANetwork) {
   O = F->createRELU("relu", O);
   O = F->createRegression("reg", O, Ex);
 
-  F = ::glow::profileQuantization(ctx, F);
+  ::glow::profileQuantization(ctx, F);
 
   ctx.allocate(A);
   ctx.allocate(Ex);

diff --git a/tests/unittests/BackendTestUtils.cpp b/tests/unittests/BackendTestUtils.cpp
@@ -59,10 +59,16 @@ static Placeholder *createQuantizedPlaceholder(Module &mod,
 /// Clone, profile, and run \p origF given the \p ctx and \p EE. \returns the
 /// quantization parameters from the profile, given the lowered info passed in
 /// via \p loweredMap, and the specified \p schema.
-static std::vector<NodeQuantizationInfo> profileAndGetNodeQuantizationInfo(
-    PlaceholderBindings &bindings, ExecutionEngine &EE, Function *origF,
-    const LoweredInfoMap &loweredMap, quantization::Schema schema) {
-  Function *profileF = glow::profileQuantization(bindings, origF);
+static std::vector<NodeQuantizationInfo>
+profileAndGetNodeQuantizationInfo(PlaceholderBindings &bindings,
+                                  ExecutionEngine &EE, Function *origF,
+                                  quantization::Schema schema) {
+  // Lower everything for profiling in a cloned PF, keeping track of lowered
+  // info in loweredMap, which is then used when generating QI.
+  Function *profileF = origF->clone("profile");
+  LoweredInfoMap loweredMap;
+  lower(profileF, &loweredMap, EE.getBackend());
+  glow::profileQuantization(bindings, profileF);
   EE.compile(CompilationMode::Infer, profileF);
 
   EE.run(bindings);
@@ -81,14 +87,8 @@ static void profileAndQuantize(PlaceholderBindings &Ibindings,
                                ElemKind backendElemKind,
                                quantization::Schema schema,
                                bool enableRowwiseQuantization) {
-  // Lower everything for profiling in a cloned PF, keeping track of lowered
-  // info in loweredMap, which is then used when generating QI.
-  Function *PF = IF->clone("profile");
-  LoweredInfoMap loweredMapForProf;
-  lower(PF, &loweredMapForProf, IEE.getBackend());
   quantization::QuantizationConfiguration quantConfig{
-      profileAndGetNodeQuantizationInfo(Ibindings, IEE, PF, loweredMapForProf,
-                                        schema)};
+      profileAndGetNodeQuantizationInfo(Ibindings, IEE, IF, schema)};
   quantConfig.enableRowwise = enableRowwiseQuantization;
   quantConfig.schema = schema;
   quantConfig.assertAllNodesQuantized = true;
@@ -98,16 +98,16 @@ static void profileAndQuantize(PlaceholderBindings &Ibindings,
     // Lower only as the backends prefer for actually quantizing.
     LoweredInfoMap loweredMapForQuant;
     lower(IF, &loweredMapForQuant, IEE.getBackend());
-    IF = quantization::quantizeFunction(IF, quantConfig, *IEE.getBackend(),
-                                        loweredMapForQuant);
+    quantization::quantizeFunction(IF, quantConfig, *IEE.getBackend(),
+                                   loweredMapForQuant);
   }
   if (isQuantizedElemKind(backendElemKind)) {
     quantConfig.precision = backendElemKind;
     // Lower only as the backends prefer for actually quantizing.
     LoweredInfoMap loweredMapForQuant;
     lower(BF, &loweredMapForQuant, BEE.getBackend());
-    BF = quantization::quantizeFunction(BF, quantConfig, *BEE.getBackend(),
-                                        loweredMapForQuant);
+    quantization::quantizeFunction(BF, quantConfig, *BEE.getBackend(),
+                                   loweredMapForQuant);
   }
 }
 

diff --git a/tests/unittests/GraphTest.cpp b/tests/unittests/GraphTest.cpp
@@ -406,7 +406,7 @@ TEST(Graph, QuantizationProfileNodes) {
 
   // Simulate actual usage.
   ::optimize(F, CompilationMode::Infer);
-  F = ::glow::profileQuantization(bindings, F);
+  ::glow::profileQuantization(bindings, F);
   auto backend = MockBackend();
   lower(F, /* loweredMap */ nullptr, &backend);
   ::optimize(F, CompilationMode::Infer);

diff --git a/tests/unittests/MLTest.cpp b/tests/unittests/MLTest.cpp
@@ -1061,7 +1061,7 @@ TEST_P(InterpreterAndCPU, convNetForImageRecognition) {
   lower(PF, &loweredMapForProf);
 
   // Profiling:
-  PF = glow::profileQuantization(bindings, PF);
+  glow::profileQuantization(bindings, PF);
   EE.compile(CompilationMode::Infer, PF);
   runBatch(EE, bindings, 100, sampleCounter, {input}, {&images});
 
@@ -1083,10 +1083,10 @@ TEST_P(InterpreterAndCPU, convNetForImageRecognition) {
   // Build the new quantized graph.
   LoweredInfoMap loweredMapForQuant;
   lower(F, &loweredMapForQuant, EE.getBackend());
-  Function *QP = quantization::quantizeFunction(
-      F, quantConfig, *EE.getBackend(), loweredMapForQuant, doNotQuantizeKinds);
+  quantization::quantizeFunction(F, quantConfig, *EE.getBackend(),
+                                 loweredMapForQuant, doNotQuantizeKinds);
 
-  EE.compile(CompilationMode::Infer, QP);
+  EE.compile(CompilationMode::Infer, F);
 
   // Generate the images used for testing.
   Tensor testImages(ElemKind::FloatTy, {batchSize, 8, 8, 1});
@@ -1183,7 +1183,7 @@ TEST_P(InterpreterAndCPU, testFindPixelRegression) {
   lower(PF, &loweredMapForProf);
 
   // Profile the fully lowered 'F', 'PF'.
-  PF = glow::profileQuantization(bindings, PF);
+  glow::profileQuantization(bindings, PF);
   EE.compile(CompilationMode::Infer, PF);
 
   // Run the graph to capture the profile.
@@ -1203,10 +1203,10 @@ TEST_P(InterpreterAndCPU, testFindPixelRegression) {
   // Build the new quantized graph.
   LoweredInfoMap loweredMapForQuant;
   lower(F, &loweredMapForQuant, EE.getBackend());
-  Function *QP = quantization::quantizeFunction(
-      F, quantConfig, *EE.getBackend(), loweredMapForQuant);
+  quantization::quantizeFunction(F, quantConfig, *EE.getBackend(),
+                                 loweredMapForQuant);
 
-  EE.compile(CompilationMode::Infer, QP);
+  EE.compile(CompilationMode::Infer, F);
 
   // Generate the images used for testing.
   Tensor testImages(ElemKind::FloatTy, {batchSize, 10, 10, 1});