sort benchmarks in python code

q315099997 · Jul 12, 2018 · 21d65fb · 21d65fb
1 parent 3d608e4
commit 21d65fb
Show file tree

Hide file tree

Showing 10 changed files with 266 additions and 75 deletions.
diff --git a/aibench/benchmark/benchmark.cc b/aibench/benchmark/benchmark.cc
@@ -75,38 +75,42 @@ Benchmark::Benchmark(BaseExecutor *executor,
       input_shapes_(input_shapes),
       output_names_(output_names),
       output_shapes_(output_shapes) {
-  if (input_names.size() != input_shapes.size()
-      || (input_files.size() != input_shapes.size() && input_files.size() > 0)
-      || output_names.size() != output_shapes.size()) {
-    printf("size of input_names(%d), input_files(%d) and input_shapes(%d) "
-               "should be equal. sizeof output_names(%d) and output_shapes(%d) "
-               "should be equal.\n",
-           static_cast<int>(input_names.size()),
-           static_cast<int>(input_files.size()),
-           static_cast<int>(input_shapes.size()),
-           static_cast<int>(output_names.size()),
-           static_cast<int>(output_shapes.size()));
+  if (input_names.size() != input_shapes.size() ||
+      (input_files.size() != input_shapes.size() && input_files.size() > 0) ||
+      output_names.size() != output_shapes.size()) {
+    printf(
+        "size of input_names(%d), input_files(%d) and input_shapes(%d) "
+        "should be equal. sizeof output_names(%d) and output_shapes(%d) "
+        "should be equal.\n",
+        static_cast<int>(input_names.size()),
+        static_cast<int>(input_files.size()),
+        static_cast<int>(input_shapes.size()),
+        static_cast<int>(output_names.size()),
+        static_cast<int>(output_shapes.size()));
     abort();
   }
   Register();
 }
 
 // Run all benchmarks filtered by model_name
-Status Benchmark::Run(const char *model_name, const char *framework,
-                      const char *runtime, int run_interval, int num_threads) {
+Status Benchmark::Run(const char *model_name,
+                      const char *framework,
+                      const char *runtime,
+                      int run_interval,
+                      int num_threads) {
   if (!all_benchmarks) return SUCCESS;
 
   // sort by model name, framework and runtime
   // the compare function tends to shuffle benchmarks by runtime
   std::sort(all_benchmarks->begin(), all_benchmarks->end(),
             [](const Benchmark *lhs, const Benchmark *rhs) {
-              return lhs->model_name_ < rhs->model_name_
-                || (lhs->model_name_ == rhs->model_name_
-                  && (lhs->executor_->GetFramework()
-                    < rhs->executor_->GetFramework() || (
-                    lhs->executor_->GetFramework()
-                      == rhs->executor_->GetFramework()
-                      && lhs->executor_->GetRuntime() != aibench::CPU)));
+              return lhs->model_name_ < rhs->model_name_ ||
+                     (lhs->model_name_ == rhs->model_name_ &&
+                      (lhs->executor_->GetFramework() <
+                           rhs->executor_->GetFramework() ||
+                       (lhs->executor_->GetFramework() ==
+                            rhs->executor_->GetFramework() &&
+                        lhs->executor_->GetRuntime() != aibench::CPU)));
             });
 
   // Internal perf regression tools depends on the output formatting,
@@ -122,25 +126,25 @@ Status Benchmark::Run(const char *model_name, const char *framework,
     if (strcmp(runtime, "all") != 0 &&
         ParseRuntime(runtime) != b->executor_->GetRuntime())
       continue;
-    double init_seconds, run_seconds;
-    printf("benchmarking:%s,%d,%d\n",
-           b->model_name_.c_str(),
-           b->executor_->GetFramework(),
-           b->executor_->GetRuntime());
-    Status status = b->Run(&init_seconds, &run_seconds, num_threads);
+
+    // sleep run_interval seconds to cool off the target
+    printf("sleep %d\n", run_interval);
+    sleep(static_cast<uint32_t>(run_interval));
+
+    double init_ms, run_ms;
+    printf("benchmarking: %s,%d,%d\n", b->model_name_.c_str(),
+           b->executor_->GetFramework(), b->executor_->GetRuntime());
+    Status status = b->Run(&init_ms, &run_ms, num_threads);
     if (status != SUCCESS) {
       res = status;
+      printf("benchmark failed: %s,%d,%d\n", b->model_name_.c_str(),
+             b->executor_->GetFramework(), b->executor_->GetRuntime());
       continue;
     }
     // model_name,framework,runtime,init time,inference time
-    printf("benchmark:%s,%d,%d,%.3f,%.3f\n",
-           b->model_name_.c_str(),
-           b->executor_->GetFramework(),
-           b->executor_->GetRuntime(),
-           init_seconds * 1000,
-           run_seconds * 1000);
-    // sleep run_interval seconds to cool off the target
-    sleep(static_cast<uint32_t>(run_interval));
+    printf("benchmark: %s,%d,%d,%.3f,%.3f\n", b->model_name_.c_str(),
+           b->executor_->GetFramework(), b->executor_->GetRuntime(), init_ms,
+           run_ms);
   }
   return res;
 }
@@ -150,23 +154,28 @@ void Benchmark::Register() {
   all_benchmarks->push_back(this);
 }
 
-Status Benchmark::Run(double *init_seconds, double *run_seconds,
-                      int num_threads) {
-  static const int64_t kMinIters = 10;
-  static const int64_t kMaxIters = 1000000000;
-  static const double kMinTime = 2;
-  int64_t iters = kMinIters;
+Status Benchmark::Run(double *init_ms, double *run_ms, int num_threads) {
+  static const int64_t kMinIters = 5;
+  static const int64_t kMaxIters = 20;
+  static const double kMinTime = 2000000;  // microseconds
+  static const float quantile = 0.8;
   int64_t start_time, end_time;
   Status status;
   // Init the target's environment
   status = executor_->Init(model_file_.c_str(), num_threads);
-  if (status != SUCCESS) return status;
+  if (status != SUCCESS) {
+    executor_->Finish();
+    return status;
+  }
   // prepare
   start_time = NowMicros();
   status = executor_->Prepare(model_file_.c_str());
   end_time = NowMicros();
-  *init_seconds = (end_time - start_time) * 1e-6;
-  if (status != SUCCESS) return status;
+  *init_ms = (end_time - start_time) * 1e-3;
+  if (status != SUCCESS) {
+    executor_->Finish();
+    return status;
+  }
   // warm-up
   std::map<std::string, BaseTensor> inputs;
   std::map<std::string, BaseTensor> outputs;
@@ -202,28 +211,46 @@ Status Benchmark::Run(double *init_seconds, double *run_seconds,
                                              std::default_delete<float[]>());
     outputs[output_names_[i]] = BaseTensor(output_shapes_[i], buffer_out);
   }
-  for (int i = 0; i < 5; ++i) {
+
+  for (int i = 0; i < 2; ++i) {
     status = executor_->Run(inputs, &outputs);
   }
-  if (status != SUCCESS) return status;
-  while (true) {
+  if (status != SUCCESS) {
+    executor_->Finish();
+    return status;
+  }
+
+  std::vector<int64_t> durations;
+  int64_t total_duration = 0;
+  size_t benchmark_iters = 0;
+
+  for (int i = 0; i < kMinIters || (total_duration < kMinTime && i < kMaxIters);
+       ++i) {
     start_time = NowMicros();
-    for (int i = 0; i < iters; ++i) {
-      executor_->Run(inputs, &outputs);
-    }
+    status = executor_->Run(inputs, &outputs);
     end_time = NowMicros();
-    const double seconds = (end_time - start_time) * 1e-6;
-    if (seconds >= kMinTime || iters >= kMaxIters) {
-      *run_seconds = seconds / iters;
+    durations.push_back(end_time - start_time);
+    total_duration += durations.back();
+    if (status != SUCCESS) {
       executor_->Finish();
-      return SUCCESS;
+      return status;
     }
-
-    // Update number of iterations.
-    // Overshoot by 100% in an attempt to succeed the next time.
-    double multiplier = 2.0 * kMinTime / std::max(seconds, 1e-9);
-    iters = std::min<int64_t>(multiplier * iters, kMaxIters);
+    ++benchmark_iters;
   }
+
+  std::sort(durations.begin(), durations.end());
+
+  size_t valid_iters = std::max(
+      static_cast<size_t>(1), static_cast<size_t>(benchmark_iters * quantile));
+  size_t start_iter = (benchmark_iters - valid_iters) / 2;
+  valid_iters = std::min(valid_iters, benchmark_iters - start_iter);
+  total_duration =
+      std::accumulate(durations.begin() + start_iter,
+                      durations.begin() + (start_iter + valid_iters), 0);
+
+  *run_ms = total_duration * 1e-3 / valid_iters;
+  executor_->Finish();
+  return SUCCESS;
 }
 
 int64_t NowMicros() {

diff --git a/aibench/executors/BUILD b/aibench/executors/BUILD
@@ -106,7 +106,6 @@ cc_library(
     hdrs = [
         "tflite/tflite_executor.h",
     ],
-
     deps = [
         ":base_executor",
     ] + if_android_armv7([

diff --git a/aibench/executors/snpe/snpe_executor.cc b/aibench/executors/snpe/snpe_executor.cc
@@ -71,13 +71,12 @@ Status ProcessInput(zdl::SNPE::SNPE *snpe,
     std::cerr << "inputs size not matched" << std::endl;
     return Status::RUNTIME_ERROR;
   }
-  std::unique_ptr<zdl::DlSystem::ITensor> input_tensor;
   for (size_t i = 0; i < input_tensor_names.size(); i++) {
     std::string input_name(input_tensor_names.at(i));
     const auto &input_shape_opt =
         snpe->getInputDimensions(input_tensor_names.at(i));
     const auto &input_shape = *input_shape_opt;
-    input_tensor =
+    std::unique_ptr<zdl::DlSystem::ITensor> input_tensor =
         zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(input_shape);
     size_t input_size = inputs.at(input_name).size();
 
@@ -136,25 +135,34 @@ Status SnpeExecutor::Prepare(const char *model_name) {
 
 Status SnpeExecutor::Run(const std::map<std::string, BaseTensor> &inputs,
                          std::map<std::string, BaseTensor> *outputs) {
-  Status status;
+  Status status = SUCCESS;
+
+  zdl::DlSystem::TensorMap input_tensor_map;
+  zdl::DlSystem::TensorMap output_tensor_map;
+
   // step1: prepare inputs
-  input_tensor_map_.clear();
-  status = ProcessInput(snpe_.get(), inputs, &input_tensor_map_);
+  status = ProcessInput(snpe_.get(), inputs, &input_tensor_map);
   if (status != Status::SUCCESS) return status;
 
   // step2: execute
-  output_tensor_map_.clear();
-  snpe_.get()->execute(input_tensor_map_, output_tensor_map_);
+  snpe_.get()->execute(input_tensor_map, output_tensor_map);
 
   // step3: process output
-  status = ProcessOutput(output_tensor_map_, outputs);
+  status = ProcessOutput(output_tensor_map, outputs);
+
+  auto tensor_names = input_tensor_map.getTensorNames();
+  for (size_t i = 0; i < tensor_names.size(); ++i) {
+    std::string input_name(tensor_names.at(i));
+    zdl::DlSystem::ITensor* input_tensor =
+      input_tensor_map.getTensor(input_name.c_str());
+    delete input_tensor;
+  }
+
   return status;
 }
 
 void SnpeExecutor::Finish() {
   if (snpe_ != nullptr) snpe_.reset();
-  input_tensor_map_.clear();
-  output_tensor_map_.clear();
 }
 
 }  // namespace aibench
diff --git a/aibench/executors/snpe/snpe_executor.h b/aibench/executors/snpe/snpe_executor.h
@@ -38,8 +38,6 @@ class SnpeExecutor : public BaseExecutor {
   virtual void Finish();
  private:
   std::unique_ptr<zdl::SNPE::SNPE> snpe_;
-  zdl::DlSystem::TensorMap input_tensor_map_;
-  zdl::DlSystem::TensorMap output_tensor_map_;
 };
 
 }  // namespace aibench

diff --git a/tools/bazel.rc b/tools/bazel.rc
@@ -1,5 +1,8 @@
 build --verbose_failures
 build --copt=-std=c++11
+build --copt=-O3
+build --copt=-ffast-math
+build --copt=-Ofast
 build --strategy=CppCompile=standalone
 
 # By default, we don't distinct target and host platfroms.

diff --git a/tools/benchmark.py b/tools/benchmark.py
@@ -185,6 +185,7 @@ def main(unused_args):
     all_prepare = []
     all_run_avg = []
     for target_abi in target_abis:
+        print("Prepare to run models on %s" % target_abi)
         if target_abi not in abi_types:
             print("Not supported abi: %s" % target_abi)
             continue

diff --git a/tools/google-format.sh b/tools/google-format.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+clang-format-3.9 \
+  -style="{BasedOnStyle: google,    \
+           DerivePointerAlignment: false, \
+           PointerAlignment: Right, \
+           BinPackParameters: false}"  -i $1
diff --git a/tools/model_list.py b/tools/model_list.py
@@ -0,0 +1,22 @@
+# Copyright 2018 Xiaomi, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+BENCHMARK_MODELS = (
+    "MobileNetV1",
+    "MobileNetV2",
+    "SqueezeNetV11",
+    "InceptionV3",
+    "VGG16"
+)