From 21d65fbe795a062f2fd469335e9e2bfa4a647a23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=8E=E5=AF=85?= <liyin@xiaomi.com>
Date: Wed, 4 Jul 2018 14:24:26 +0800
Subject: [PATCH] sort benchmarks in python code

---
 aibench/benchmark/benchmark.cc          | 143 ++++++++++++++----------
 aibench/executors/BUILD                 |   1 -
 aibench/executors/snpe/snpe_executor.cc |  28 +++--
 aibench/executors/snpe/snpe_executor.h  |   2 -
 tools/bazel.rc                          |   3 +
 tools/benchmark.py                      |   1 +
 tools/google-format.sh                  |   7 ++
 tools/model_list.py                     |  22 ++++
 tools/power.sh                          | 111 ++++++++++++++++++
 tools/sh_commands.py                    |  23 +++-
 10 files changed, 266 insertions(+), 75 deletions(-)
 create mode 100755 tools/google-format.sh
 create mode 100644 tools/model_list.py
 create mode 100755 tools/power.sh

diff --git a/aibench/benchmark/benchmark.cc b/aibench/benchmark/benchmark.cc
index b96061c..1424444 100644
--- a/aibench/benchmark/benchmark.cc
+++ b/aibench/benchmark/benchmark.cc
@@ -75,38 +75,42 @@ Benchmark::Benchmark(BaseExecutor *executor,
       input_shapes_(input_shapes),
       output_names_(output_names),
       output_shapes_(output_shapes) {
-  if (input_names.size() != input_shapes.size()
-      || (input_files.size() != input_shapes.size() && input_files.size() > 0)
-      || output_names.size() != output_shapes.size()) {
-    printf("size of input_names(%d), input_files(%d) and input_shapes(%d) "
-               "should be equal. sizeof output_names(%d) and output_shapes(%d) "
-               "should be equal.\n",
-           static_cast<int>(input_names.size()),
-           static_cast<int>(input_files.size()),
-           static_cast<int>(input_shapes.size()),
-           static_cast<int>(output_names.size()),
-           static_cast<int>(output_shapes.size()));
+  if (input_names.size() != input_shapes.size() ||
+      (input_files.size() != input_shapes.size() && input_files.size() > 0) ||
+      output_names.size() != output_shapes.size()) {
+    printf(
+        "size of input_names(%d), input_files(%d) and input_shapes(%d) "
+        "should be equal. sizeof output_names(%d) and output_shapes(%d) "
+        "should be equal.\n",
+        static_cast<int>(input_names.size()),
+        static_cast<int>(input_files.size()),
+        static_cast<int>(input_shapes.size()),
+        static_cast<int>(output_names.size()),
+        static_cast<int>(output_shapes.size()));
     abort();
   }
   Register();
 }
 
 // Run all benchmarks filtered by model_name
-Status Benchmark::Run(const char *model_name, const char *framework,
-                      const char *runtime, int run_interval, int num_threads) {
+Status Benchmark::Run(const char *model_name,
+                      const char *framework,
+                      const char *runtime,
+                      int run_interval,
+                      int num_threads) {
   if (!all_benchmarks) return SUCCESS;
 
   // sort by model name, framework and runtime
   // the compare function tends to shuffle benchmarks by runtime
   std::sort(all_benchmarks->begin(), all_benchmarks->end(),
             [](const Benchmark *lhs, const Benchmark *rhs) {
-              return lhs->model_name_ < rhs->model_name_
-                || (lhs->model_name_ == rhs->model_name_
-                  && (lhs->executor_->GetFramework()
-                    < rhs->executor_->GetFramework() || (
-                    lhs->executor_->GetFramework()
-                      == rhs->executor_->GetFramework()
-                      && lhs->executor_->GetRuntime() != aibench::CPU)));
+              return lhs->model_name_ < rhs->model_name_ ||
+                     (lhs->model_name_ == rhs->model_name_ &&
+                      (lhs->executor_->GetFramework() <
+                           rhs->executor_->GetFramework() ||
+                       (lhs->executor_->GetFramework() ==
+                            rhs->executor_->GetFramework() &&
+                        lhs->executor_->GetRuntime() != aibench::CPU)));
             });
 
   // Internal perf regression tools depends on the output formatting,
@@ -122,25 +126,25 @@ Status Benchmark::Run(const char *model_name, const char *framework,
     if (strcmp(runtime, "all") != 0 &&
         ParseRuntime(runtime) != b->executor_->GetRuntime())
       continue;
-    double init_seconds, run_seconds;
-    printf("benchmarking:%s,%d,%d\n",
-           b->model_name_.c_str(),
-           b->executor_->GetFramework(),
-           b->executor_->GetRuntime());
-    Status status = b->Run(&init_seconds, &run_seconds, num_threads);
+
+    // sleep run_interval seconds to cool off the target
+    printf("sleep %d\n", run_interval);
+    sleep(static_cast<uint32_t>(run_interval));
+
+    double init_ms, run_ms;
+    printf("benchmarking: %s,%d,%d\n", b->model_name_.c_str(),
+           b->executor_->GetFramework(), b->executor_->GetRuntime());
+    Status status = b->Run(&init_ms, &run_ms, num_threads);
     if (status != SUCCESS) {
       res = status;
+      printf("benchmark failed: %s,%d,%d\n", b->model_name_.c_str(),
+             b->executor_->GetFramework(), b->executor_->GetRuntime());
       continue;
     }
     // model_name,framework,runtime,init time,inference time
-    printf("benchmark:%s,%d,%d,%.3f,%.3f\n",
-           b->model_name_.c_str(),
-           b->executor_->GetFramework(),
-           b->executor_->GetRuntime(),
-           init_seconds * 1000,
-           run_seconds * 1000);
-    // sleep run_interval seconds to cool off the target
-    sleep(static_cast<uint32_t>(run_interval));
+    printf("benchmark: %s,%d,%d,%.3f,%.3f\n", b->model_name_.c_str(),
+           b->executor_->GetFramework(), b->executor_->GetRuntime(), init_ms,
+           run_ms);
   }
   return res;
 }
@@ -150,23 +154,28 @@ void Benchmark::Register() {
   all_benchmarks->push_back(this);
 }
 
-Status Benchmark::Run(double *init_seconds, double *run_seconds,
-                      int num_threads) {
-  static const int64_t kMinIters = 10;
-  static const int64_t kMaxIters = 1000000000;
-  static const double kMinTime = 2;
-  int64_t iters = kMinIters;
+Status Benchmark::Run(double *init_ms, double *run_ms, int num_threads) {
+  static const int64_t kMinIters = 5;
+  static const int64_t kMaxIters = 20;
+  static const double kMinTime = 2000000;  // microseconds
+  static const float quantile = 0.8;
   int64_t start_time, end_time;
   Status status;
   // Init the target's environment
   status = executor_->Init(model_file_.c_str(), num_threads);
-  if (status != SUCCESS) return status;
+  if (status != SUCCESS) {
+    executor_->Finish();
+    return status;
+  }
   // prepare
   start_time = NowMicros();
   status = executor_->Prepare(model_file_.c_str());
   end_time = NowMicros();
-  *init_seconds = (end_time - start_time) * 1e-6;
-  if (status != SUCCESS) return status;
+  *init_ms = (end_time - start_time) * 1e-3;
+  if (status != SUCCESS) {
+    executor_->Finish();
+    return status;
+  }
   // warm-up
   std::map<std::string, BaseTensor> inputs;
   std::map<std::string, BaseTensor> outputs;
@@ -202,28 +211,46 @@ Status Benchmark::Run(double *init_seconds, double *run_seconds,
                                              std::default_delete<float[]>());
     outputs[output_names_[i]] = BaseTensor(output_shapes_[i], buffer_out);
   }
-  for (int i = 0; i < 5; ++i) {
+
+  for (int i = 0; i < 2; ++i) {
     status = executor_->Run(inputs, &outputs);
   }
-  if (status != SUCCESS) return status;
-  while (true) {
+  if (status != SUCCESS) {
+    executor_->Finish();
+    return status;
+  }
+
+  std::vector<int64_t> durations;
+  int64_t total_duration = 0;
+  size_t benchmark_iters = 0;
+
+  for (int i = 0; i < kMinIters || (total_duration < kMinTime && i < kMaxIters);
+       ++i) {
     start_time = NowMicros();
-    for (int i = 0; i < iters; ++i) {
-      executor_->Run(inputs, &outputs);
-    }
+    status = executor_->Run(inputs, &outputs);
     end_time = NowMicros();
-    const double seconds = (end_time - start_time) * 1e-6;
-    if (seconds >= kMinTime || iters >= kMaxIters) {
-      *run_seconds = seconds / iters;
+    durations.push_back(end_time - start_time);
+    total_duration += durations.back();
+    if (status != SUCCESS) {
       executor_->Finish();
-      return SUCCESS;
+      return status;
     }
-
-    // Update number of iterations.
-    // Overshoot by 100% in an attempt to succeed the next time.
-    double multiplier = 2.0 * kMinTime / std::max(seconds, 1e-9);
-    iters = std::min<int64_t>(multiplier * iters, kMaxIters);
+    ++benchmark_iters;
   }
+
+  std::sort(durations.begin(), durations.end());
+
+  size_t valid_iters = std::max(
+      static_cast<size_t>(1), static_cast<size_t>(benchmark_iters * quantile));
+  size_t start_iter = (benchmark_iters - valid_iters) / 2;
+  valid_iters = std::min(valid_iters, benchmark_iters - start_iter);
+  total_duration =
+      std::accumulate(durations.begin() + start_iter,
+                      durations.begin() + (start_iter + valid_iters), 0);
+
+  *run_ms = total_duration * 1e-3 / valid_iters;
+  executor_->Finish();
+  return SUCCESS;
 }
 
 int64_t NowMicros() {
diff --git a/aibench/executors/BUILD b/aibench/executors/BUILD
index fc8fd29..9510250 100644
--- a/aibench/executors/BUILD
+++ b/aibench/executors/BUILD
@@ -106,7 +106,6 @@ cc_library(
     hdrs = [
         "tflite/tflite_executor.h",
     ],
-
     deps = [
         ":base_executor",
     ] + if_android_armv7([
diff --git a/aibench/executors/snpe/snpe_executor.cc b/aibench/executors/snpe/snpe_executor.cc
index 91b96c9..206d1bd 100644
--- a/aibench/executors/snpe/snpe_executor.cc
+++ b/aibench/executors/snpe/snpe_executor.cc
@@ -71,13 +71,12 @@ Status ProcessInput(zdl::SNPE::SNPE *snpe,
     std::cerr << "inputs size not matched" << std::endl;
     return Status::RUNTIME_ERROR;
   }
-  std::unique_ptr<zdl::DlSystem::ITensor> input_tensor;
   for (size_t i = 0; i < input_tensor_names.size(); i++) {
     std::string input_name(input_tensor_names.at(i));
     const auto &input_shape_opt =
         snpe->getInputDimensions(input_tensor_names.at(i));
     const auto &input_shape = *input_shape_opt;
-    input_tensor =
+    std::unique_ptr<zdl::DlSystem::ITensor> input_tensor =
         zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(input_shape);
     size_t input_size = inputs.at(input_name).size();
 
@@ -136,25 +135,34 @@ Status SnpeExecutor::Prepare(const char *model_name) {
 
 Status SnpeExecutor::Run(const std::map<std::string, BaseTensor> &inputs,
                          std::map<std::string, BaseTensor> *outputs) {
-  Status status;
+  Status status = SUCCESS;
+
+  zdl::DlSystem::TensorMap input_tensor_map;
+  zdl::DlSystem::TensorMap output_tensor_map;
+
   // step1: prepare inputs
-  input_tensor_map_.clear();
-  status = ProcessInput(snpe_.get(), inputs, &input_tensor_map_);
+  status = ProcessInput(snpe_.get(), inputs, &input_tensor_map);
   if (status != Status::SUCCESS) return status;
 
   // step2: execute
-  output_tensor_map_.clear();
-  snpe_.get()->execute(input_tensor_map_, output_tensor_map_);
+  snpe_.get()->execute(input_tensor_map, output_tensor_map);
 
   // step3: process output
-  status = ProcessOutput(output_tensor_map_, outputs);
+  status = ProcessOutput(output_tensor_map, outputs);
+
+  auto tensor_names = input_tensor_map.getTensorNames();
+  for (size_t i = 0; i < tensor_names.size(); ++i) {
+    std::string input_name(tensor_names.at(i));
+    zdl::DlSystem::ITensor* input_tensor =
+      input_tensor_map.getTensor(input_name.c_str());
+    delete input_tensor;
+  }
+
   return status;
 }
 
 void SnpeExecutor::Finish() {
   if (snpe_ != nullptr) snpe_.reset();
-  input_tensor_map_.clear();
-  output_tensor_map_.clear();
 }
 
 }  // namespace aibench
diff --git a/aibench/executors/snpe/snpe_executor.h b/aibench/executors/snpe/snpe_executor.h
index c06bce1..94f1b4f 100644
--- a/aibench/executors/snpe/snpe_executor.h
+++ b/aibench/executors/snpe/snpe_executor.h
@@ -38,8 +38,6 @@ class SnpeExecutor : public BaseExecutor {
   virtual void Finish();
  private:
   std::unique_ptr<zdl::SNPE::SNPE> snpe_;
-  zdl::DlSystem::TensorMap input_tensor_map_;
-  zdl::DlSystem::TensorMap output_tensor_map_;
 };
 
 }  // namespace aibench
diff --git a/tools/bazel.rc b/tools/bazel.rc
index 267345d..93a9b8f 100644
--- a/tools/bazel.rc
+++ b/tools/bazel.rc
@@ -1,5 +1,8 @@
 build --verbose_failures
 build --copt=-std=c++11
+build --copt=-O3
+build --copt=-ffast-math
+build --copt=-Ofast
 build --strategy=CppCompile=standalone
 
 # By default, we don't distinct target and host platfroms.
diff --git a/tools/benchmark.py b/tools/benchmark.py
index ba607bc..ada01fa 100644
--- a/tools/benchmark.py
+++ b/tools/benchmark.py
@@ -185,6 +185,7 @@ def main(unused_args):
     all_prepare = []
     all_run_avg = []
     for target_abi in target_abis:
+        print("Prepare to run models on %s" % target_abi)
         if target_abi not in abi_types:
             print("Not supported abi: %s" % target_abi)
             continue
diff --git a/tools/google-format.sh b/tools/google-format.sh
new file mode 100755
index 0000000..6d8bf9d
--- /dev/null
+++ b/tools/google-format.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+clang-format-3.9 \
+  -style="{BasedOnStyle: google,    \
+           DerivePointerAlignment: false, \
+           PointerAlignment: Right, \
+           BinPackParameters: false}"  -i $1
diff --git a/tools/model_list.py b/tools/model_list.py
new file mode 100644
index 0000000..e87e2c4
--- /dev/null
+++ b/tools/model_list.py
@@ -0,0 +1,22 @@
+# Copyright 2018 Xiaomi, Inc.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+BENCHMARK_MODELS = (
+    "MobileNetV1",
+    "MobileNetV2",
+    "SqueezeNetV11",
+    "InceptionV3",
+    "VGG16"
+)
diff --git a/tools/power.sh b/tools/power.sh
new file mode 100755
index 0000000..e9b34c5
--- /dev/null
+++ b/tools/power.sh
@@ -0,0 +1,111 @@
+SERIALNO=$1
+PLATFORM=$2
+ADB="adb -s $SERIALNO"
+
+echo "Adjust power to performance mode on $SERIALNO, $PLATFORM"
+
+$ADB root || exit 1
+$ADB wait-for-device
+$ADB remount
+$ADB wait-for-device
+
+$ADB shell "stop thermald"
+$ADB shell "stop mpdecision"
+# disable thermal
+$ADB shell "stop thermal-engine && stop thermal-hal-1-0"
+# stop perflock HAL
+$ADB shell "stop perf-hal-1-0"
+
+# boost cpu freq
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu0/online"
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu1/online"
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu2/online"
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu3/online"
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu4/online"
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu5/online"
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu6/online"
+$ADB shell "echo 1 > /sys/devices/system/cpu/cpu7/online"
+
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor"
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu1/cpufreq/scaling_governor"
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu2/cpufreq/scaling_governor"
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu3/cpufreq/scaling_governor"
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu4/cpufreq/scaling_governor"
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu5/cpufreq/scaling_governor"
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu6/cpufreq/scaling_governor"
+$ADB shell "echo performance > /sys/devices/system/cpu/cpu7/cpufreq/scaling_governor"
+
+# bw vote max
+$ADB shell "echo performance > /sys/class/devfreq/1d84000.ufshc/governor"
+$ADB shell "echo performance > /sys/class/devfreq/5000000.qcom,kgsl-3d0/governor"
+$ADB shell "echo performance > /sys/class/devfreq/aa00000.qcom,vidc:arm9_bus_ddr/governor"
+$ADB shell "echo performance > /sys/class/devfreq/aa00000.qcom,vidc:bus_cnoc/governor"
+$ADB shell "echo performance > /sys/class/devfreq/aa00000.qcom,vidc:venus_bus_ddr/governor"
+$ADB shell "echo performance > /sys/class/devfreq/aa00000.qcom,vidc:venus_bus_llcc/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,cpubw/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,gpubw/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,kgsl-busmon/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,l3-cdsp/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,l3-cpu0/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,l3-cpu4/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,llccbw/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,memlat-cpu0/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,memlat-cpu4/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,mincpubw/governor"
+$ADB shell "echo performance > /sys/class/devfreq/soc:qcom,snoc_cnoc_keepalive/governor"
+
+# boost gpu freq
+$ADB shell "echo 0 > /sys/class/kgsl/kgsl-3d0/min_pwrlevel"
+$ADB shell "echo 0 > /sys/class/kgsl/kgsl-3d0/max_pwrlevel"
+$ADB shell "echo performance > /sys/class/kgsl/kgsl-3d0/devfreq/governor"
+$ADB shell "cat /sys/class/kgsl/kgsl-3d0/gpuclk"
+$ADB shell "echo 1000000 > /sys/class/kgsl/kgsl-3d0/idle_timer"
+$ADB shell "echo 1 > /d/dri/0/debug/core_perf/perf_mode"
+
+
+$ADB shell "echo 4 > /sys/devices/system/cpu/cpu0/core_ctl/min_cpus"
+$ADB shell "echo 4 > /sys/devices/system/cpu/cpu4/core_ctl/min_cpus"
+$ADB shell "echo 35 > /proc/sys/kernel/sched_downmigrate && echo 55 > /proc/sys/kernel/sched_upmigrate"
+$ADB shell "echo 512 > /sys/block/sda/queue/nr_requests && echo 1024 > /sys/block/sda/queue/read_ahead_kb"
+
+#$ADB shell "echo 100 > /proc/sys/kernel/sched_cfs_boost"
+$ADB shell "echo 100 > /dev/stune/top-app/schedtune.boost"
+$ADB shell "echo 1 > /dev/stune/top-app/schedtune.prefer_idle"
+
+# disable all level LPM by sysfs node
+$ADB shell "echo Y > /sys/module/lpm_levels/parameters/sleep_disabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu0/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu0/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu1/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu1/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu2/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu2/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu3/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu3/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu4/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu4/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu5/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu5/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu6/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu6/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu7/pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/cpu7/rail-pc/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/l3-wfi/idle_enabled"
+$ADB shell "echo N > /sys/module/lpm_levels/L3/llcc-off/idle_enabled"
+if [ "$PLATFORM" == "sdm660" ]; then
+	$ADB shell "echo N > /sys/module/lpm_levels/system/perf/cpu4/pc/idle_enabled"
+	$ADB shell "echo N > /sys/module/lpm_levels/system/perf/cpu5/pc/idle_enabled"
+	$ADB shell "echo N > /sys/module/lpm_levels/system/perf/cpu6/pc/idle_enabled"
+	$ADB shell "echo N > /sys/module/lpm_levels/system/perf/cpu7/pc/idle_enabled"
+fi
+
+# set ddr config.
+$ADB shell "echo 100 > /proc/sys/kernel/sched_initial_task_util"
+if [ "$PLATFORM" == "sdm660" ]; then
+	$ADB shell "echo 100 > /proc/sys/kernel/sched_init_task_load"  # for 660
+	$ADB shell "echo 1 >/sys/kernel/debug/msm-bus-dbg/shell-client/mas"
+	$ADB shell "echo 512 > /sys/kernel/debug/msm-bus-dbg/shell-client/slv"
+	$ADB shell "echo 28864000000 > /sys/kernel/debug/msm-bus-dbg/shell-client/ab"
+	$ADB shell "echo 28864000000 > /sys/kernel/debug/msm-bus-dbg/shell-client/ib"
+	$ADB shell "echo 1 > /sys/kernel/debug/msm-bus-dbg/shell-client/update_request"
+fi
\ No newline at end of file
diff --git a/tools/sh_commands.py b/tools/sh_commands.py
index 237257f..425a8c8 100644
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -19,6 +19,8 @@
 import sh
 import urllib
 
+from model_list import BENCHMARK_MODELS
+
 
 FRAMEWORKS = (
     "MACE",
@@ -309,6 +311,13 @@ def adb_run(abi,
         print("Run on device: %s, %s, %s" %
               (serialno, props["ro.board.platform"],
                props["ro.product.model"]))
+        try:
+            sh.bash("tools/power.sh",
+                    serialno, props["ro.board.platform"],
+                    _fg=True)
+        except Exception, e:
+            print("Config power exception %s" % str(e))
+
         sh.adb("-s", serialno, "shell", "mkdir -p %s" % device_bin_path)
         sh.adb("-s", serialno, "shell", "rm -rf %s"
                % os.path.join(device_bin_path, "interior"))
@@ -326,11 +335,17 @@ def adb_run(abi,
         cmd = "cd %s; ADSP_LIBRARY_PATH='.;/system/lib/rfsa/adsp;/system" \
               "/vendor/lib/rfsa/adsp;/dsp'; LD_LIBRARY_PATH=. " \
               "./model_benchmark" % device_bin_path
-        if set(frameworks) == set(FRAMEWORKS):
-            frameworks = ["all"]
-        for framework in frameworks:
-            for runtime in runtimes:
+        if frameworks == ['all']:
+            frameworks = FRAMEWORKS
+        if runtimes == ['all']:
+            runtimes = RUNTIMES
+        if model_names == ['all']:
+            model_names = BENCHMARK_MODELS
+
+        for runtime in runtimes:
+            for framework in frameworks:
                 for model_name in model_names:
+                    print(framework, runtime, model_name)
                     args = "--run_interval=%d --num_threads=%d " \
                            "--framework=%s --runtime=%s --model_name=%s " \
                            "--product_soc=%s.%s" % \