forked from PaddlePaddle/PaddleSlim
-
Notifications
You must be signed in to change notification settings - Fork 0
/
trt_gen_calib_table_test.cc
80 lines (69 loc) · 2.75 KB
/
trt_gen_calib_table_test.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#include <numeric>
#include <iostream>
#include <memory>
#include <chrono>
#include <random>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include "paddle/include/paddle_inference_api.h"
using paddle::AnalysisConfig;
DEFINE_string(model_file, "", "Path of the inference model file.");
DEFINE_string(params_file, "", "Path of the inference params file.");
DEFINE_string(model_dir, "", "Directory of the inference model.");
DEFINE_int32(batch_size, 1, "Batch size.");
float Random(float low, float high) {
static std::random_device rd;
static std::mt19937 mt(rd());
std::uniform_real_distribution<double> dist(low, high);
return dist(mt);
}
using Time = decltype(std::chrono::high_resolution_clock::now());
Time time() { return std::chrono::high_resolution_clock::now(); };
double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
std::unique_ptr<paddle::PaddlePredictor> CreatePredictor() {
AnalysisConfig config;
if (FLAGS_model_dir != "") {
config.SetModel(FLAGS_model_dir);
} else {
config.SetModel(FLAGS_model_file,
FLAGS_params_file);
}
config.EnableUseGpu(500, 0);
// We use ZeroCopy, so we set config.SwitchUseFeedFetchOps(false) here.
config.SwitchUseFeedFetchOps(false);
config.EnableTensorRtEngine(1 << 30, FLAGS_batch_size, 5, AnalysisConfig::Precision::kInt8, false, true /*use_calib*/);
return CreatePaddlePredictor(config);
}
void run(paddle::PaddlePredictor *predictor,
std::vector<float>& input,
const std::vector<int>& input_shape,
std::vector<float> *out_data) {
int input_num = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies<int>());
for (size_t i = 0; i < 500; i++) {
// We use random data here for example. Change this to real data in your application.
for (int j = 0; j < input_num; j++) {
input[j] = Random(0, 1.0);
}
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputTensor(input_names[0]);
input_t->Reshape(input_shape);
input_t->copy_from_cpu(input.data());
// Run predictor to generate calibration table. Can be very time-consuming.
CHECK(predictor->ZeroCopyRun());
}
}
int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
auto predictor = CreatePredictor();
std::vector<int> input_shape = {FLAGS_batch_size, 3, 224, 224};
// Init input as 1.0 here for example. You can also load preprocessed real pictures to vectors as input.
std::vector<float> input_data(FLAGS_batch_size * 3 * 224 * 224, 1.0);
std::vector<float> out_data;
run(predictor.get(), input_data, input_shape, &out_data);
return 0;
}