Skip to content

Commit

Permalink
sort benchmarks in python code
Browse files Browse the repository at this point in the history
  • Loading branch information
李寅 committed Jul 12, 2018
1 parent 3d608e4 commit 21d65fb
Show file tree
Hide file tree
Showing 10 changed files with 266 additions and 75 deletions.
143 changes: 85 additions & 58 deletions aibench/benchmark/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,38 +75,42 @@ Benchmark::Benchmark(BaseExecutor *executor,
input_shapes_(input_shapes),
output_names_(output_names),
output_shapes_(output_shapes) {
if (input_names.size() != input_shapes.size()
|| (input_files.size() != input_shapes.size() && input_files.size() > 0)
|| output_names.size() != output_shapes.size()) {
printf("size of input_names(%d), input_files(%d) and input_shapes(%d) "
"should be equal. sizeof output_names(%d) and output_shapes(%d) "
"should be equal.\n",
static_cast<int>(input_names.size()),
static_cast<int>(input_files.size()),
static_cast<int>(input_shapes.size()),
static_cast<int>(output_names.size()),
static_cast<int>(output_shapes.size()));
if (input_names.size() != input_shapes.size() ||
(input_files.size() != input_shapes.size() && input_files.size() > 0) ||
output_names.size() != output_shapes.size()) {
printf(
"size of input_names(%d), input_files(%d) and input_shapes(%d) "
"should be equal. sizeof output_names(%d) and output_shapes(%d) "
"should be equal.\n",
static_cast<int>(input_names.size()),
static_cast<int>(input_files.size()),
static_cast<int>(input_shapes.size()),
static_cast<int>(output_names.size()),
static_cast<int>(output_shapes.size()));
abort();
}
Register();
}

// Run all benchmarks filtered by model_name
Status Benchmark::Run(const char *model_name, const char *framework,
const char *runtime, int run_interval, int num_threads) {
Status Benchmark::Run(const char *model_name,
const char *framework,
const char *runtime,
int run_interval,
int num_threads) {
if (!all_benchmarks) return SUCCESS;

// sort by model name, framework and runtime
// the compare function tends to shuffle benchmarks by runtime
std::sort(all_benchmarks->begin(), all_benchmarks->end(),
[](const Benchmark *lhs, const Benchmark *rhs) {
return lhs->model_name_ < rhs->model_name_
|| (lhs->model_name_ == rhs->model_name_
&& (lhs->executor_->GetFramework()
< rhs->executor_->GetFramework() || (
lhs->executor_->GetFramework()
== rhs->executor_->GetFramework()
&& lhs->executor_->GetRuntime() != aibench::CPU)));
return lhs->model_name_ < rhs->model_name_ ||
(lhs->model_name_ == rhs->model_name_ &&
(lhs->executor_->GetFramework() <
rhs->executor_->GetFramework() ||
(lhs->executor_->GetFramework() ==
rhs->executor_->GetFramework() &&
lhs->executor_->GetRuntime() != aibench::CPU)));
});

// Internal perf regression tools depends on the output formatting,
Expand All @@ -122,25 +126,25 @@ Status Benchmark::Run(const char *model_name, const char *framework,
if (strcmp(runtime, "all") != 0 &&
ParseRuntime(runtime) != b->executor_->GetRuntime())
continue;
double init_seconds, run_seconds;
printf("benchmarking:%s,%d,%d\n",
b->model_name_.c_str(),
b->executor_->GetFramework(),
b->executor_->GetRuntime());
Status status = b->Run(&init_seconds, &run_seconds, num_threads);

// sleep run_interval seconds to cool off the target
printf("sleep %d\n", run_interval);
sleep(static_cast<uint32_t>(run_interval));

double init_ms, run_ms;
printf("benchmarking: %s,%d,%d\n", b->model_name_.c_str(),
b->executor_->GetFramework(), b->executor_->GetRuntime());
Status status = b->Run(&init_ms, &run_ms, num_threads);
if (status != SUCCESS) {
res = status;
printf("benchmark failed: %s,%d,%d\n", b->model_name_.c_str(),
b->executor_->GetFramework(), b->executor_->GetRuntime());
continue;
}
// model_name,framework,runtime,init time,inference time
printf("benchmark:%s,%d,%d,%.3f,%.3f\n",
b->model_name_.c_str(),
b->executor_->GetFramework(),
b->executor_->GetRuntime(),
init_seconds * 1000,
run_seconds * 1000);
// sleep run_interval seconds to cool off the target
sleep(static_cast<uint32_t>(run_interval));
printf("benchmark: %s,%d,%d,%.3f,%.3f\n", b->model_name_.c_str(),
b->executor_->GetFramework(), b->executor_->GetRuntime(), init_ms,
run_ms);
}
return res;
}
Expand All @@ -150,23 +154,28 @@ void Benchmark::Register() {
all_benchmarks->push_back(this);
}

Status Benchmark::Run(double *init_seconds, double *run_seconds,
int num_threads) {
static const int64_t kMinIters = 10;
static const int64_t kMaxIters = 1000000000;
static const double kMinTime = 2;
int64_t iters = kMinIters;
Status Benchmark::Run(double *init_ms, double *run_ms, int num_threads) {
static const int64_t kMinIters = 5;
static const int64_t kMaxIters = 20;
static const double kMinTime = 2000000; // microseconds
static const float quantile = 0.8;
int64_t start_time, end_time;
Status status;
// Init the target's environment
status = executor_->Init(model_file_.c_str(), num_threads);
if (status != SUCCESS) return status;
if (status != SUCCESS) {
executor_->Finish();
return status;
}
// prepare
start_time = NowMicros();
status = executor_->Prepare(model_file_.c_str());
end_time = NowMicros();
*init_seconds = (end_time - start_time) * 1e-6;
if (status != SUCCESS) return status;
*init_ms = (end_time - start_time) * 1e-3;
if (status != SUCCESS) {
executor_->Finish();
return status;
}
// warm-up
std::map<std::string, BaseTensor> inputs;
std::map<std::string, BaseTensor> outputs;
Expand Down Expand Up @@ -202,28 +211,46 @@ Status Benchmark::Run(double *init_seconds, double *run_seconds,
std::default_delete<float[]>());
outputs[output_names_[i]] = BaseTensor(output_shapes_[i], buffer_out);
}
for (int i = 0; i < 5; ++i) {

for (int i = 0; i < 2; ++i) {
status = executor_->Run(inputs, &outputs);
}
if (status != SUCCESS) return status;
while (true) {
if (status != SUCCESS) {
executor_->Finish();
return status;
}

std::vector<int64_t> durations;
int64_t total_duration = 0;
size_t benchmark_iters = 0;

for (int i = 0; i < kMinIters || (total_duration < kMinTime && i < kMaxIters);
++i) {
start_time = NowMicros();
for (int i = 0; i < iters; ++i) {
executor_->Run(inputs, &outputs);
}
status = executor_->Run(inputs, &outputs);
end_time = NowMicros();
const double seconds = (end_time - start_time) * 1e-6;
if (seconds >= kMinTime || iters >= kMaxIters) {
*run_seconds = seconds / iters;
durations.push_back(end_time - start_time);
total_duration += durations.back();
if (status != SUCCESS) {
executor_->Finish();
return SUCCESS;
return status;
}

// Update number of iterations.
// Overshoot by 100% in an attempt to succeed the next time.
double multiplier = 2.0 * kMinTime / std::max(seconds, 1e-9);
iters = std::min<int64_t>(multiplier * iters, kMaxIters);
++benchmark_iters;
}

std::sort(durations.begin(), durations.end());

size_t valid_iters = std::max(
static_cast<size_t>(1), static_cast<size_t>(benchmark_iters * quantile));
size_t start_iter = (benchmark_iters - valid_iters) / 2;
valid_iters = std::min(valid_iters, benchmark_iters - start_iter);
total_duration =
std::accumulate(durations.begin() + start_iter,
durations.begin() + (start_iter + valid_iters), 0);

*run_ms = total_duration * 1e-3 / valid_iters;
executor_->Finish();
return SUCCESS;
}

int64_t NowMicros() {
Expand Down
1 change: 0 additions & 1 deletion aibench/executors/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ cc_library(
hdrs = [
"tflite/tflite_executor.h",
],

deps = [
":base_executor",
] + if_android_armv7([
Expand Down
28 changes: 18 additions & 10 deletions aibench/executors/snpe/snpe_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,12 @@ Status ProcessInput(zdl::SNPE::SNPE *snpe,
std::cerr << "inputs size not matched" << std::endl;
return Status::RUNTIME_ERROR;
}
std::unique_ptr<zdl::DlSystem::ITensor> input_tensor;
for (size_t i = 0; i < input_tensor_names.size(); i++) {
std::string input_name(input_tensor_names.at(i));
const auto &input_shape_opt =
snpe->getInputDimensions(input_tensor_names.at(i));
const auto &input_shape = *input_shape_opt;
input_tensor =
std::unique_ptr<zdl::DlSystem::ITensor> input_tensor =
zdl::SNPE::SNPEFactory::getTensorFactory().createTensor(input_shape);
size_t input_size = inputs.at(input_name).size();

Expand Down Expand Up @@ -136,25 +135,34 @@ Status SnpeExecutor::Prepare(const char *model_name) {

Status SnpeExecutor::Run(const std::map<std::string, BaseTensor> &inputs,
std::map<std::string, BaseTensor> *outputs) {
Status status;
Status status = SUCCESS;

zdl::DlSystem::TensorMap input_tensor_map;
zdl::DlSystem::TensorMap output_tensor_map;

// step1: prepare inputs
input_tensor_map_.clear();
status = ProcessInput(snpe_.get(), inputs, &input_tensor_map_);
status = ProcessInput(snpe_.get(), inputs, &input_tensor_map);
if (status != Status::SUCCESS) return status;

// step2: execute
output_tensor_map_.clear();
snpe_.get()->execute(input_tensor_map_, output_tensor_map_);
snpe_.get()->execute(input_tensor_map, output_tensor_map);

// step3: process output
status = ProcessOutput(output_tensor_map_, outputs);
status = ProcessOutput(output_tensor_map, outputs);

auto tensor_names = input_tensor_map.getTensorNames();
for (size_t i = 0; i < tensor_names.size(); ++i) {
std::string input_name(tensor_names.at(i));
zdl::DlSystem::ITensor* input_tensor =
input_tensor_map.getTensor(input_name.c_str());
delete input_tensor;
}

return status;
}

void SnpeExecutor::Finish() {
if (snpe_ != nullptr) snpe_.reset();
input_tensor_map_.clear();
output_tensor_map_.clear();
}

} // namespace aibench
2 changes: 0 additions & 2 deletions aibench/executors/snpe/snpe_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ class SnpeExecutor : public BaseExecutor {
virtual void Finish();
private:
std::unique_ptr<zdl::SNPE::SNPE> snpe_;
zdl::DlSystem::TensorMap input_tensor_map_;
zdl::DlSystem::TensorMap output_tensor_map_;
};

} // namespace aibench
Expand Down
3 changes: 3 additions & 0 deletions tools/bazel.rc
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
build --verbose_failures
build --copt=-std=c++11
build --copt=-O3
build --copt=-ffast-math
build --copt=-Ofast
build --strategy=CppCompile=standalone

# By default, we don't distinct target and host platfroms.
Expand Down
1 change: 1 addition & 0 deletions tools/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def main(unused_args):
all_prepare = []
all_run_avg = []
for target_abi in target_abis:
print("Prepare to run models on %s" % target_abi)
if target_abi not in abi_types:
print("Not supported abi: %s" % target_abi)
continue
Expand Down
7 changes: 7 additions & 0 deletions tools/google-format.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

clang-format-3.9 \
-style="{BasedOnStyle: google, \
DerivePointerAlignment: false, \
PointerAlignment: Right, \
BinPackParameters: false}" -i $1
22 changes: 22 additions & 0 deletions tools/model_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


BENCHMARK_MODELS = (
"MobileNetV1",
"MobileNetV2",
"SqueezeNetV11",
"InceptionV3",
"VGG16"
)
Loading

0 comments on commit 21d65fb

Please sign in to comment.