Skip to content

Commit

Permalink
Speedup C++ runtime performance using OpenMP.
Browse files Browse the repository at this point in the history
  • Loading branch information
bing-jian committed May 27, 2019
1 parent 8ab6701 commit 36fb490
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 32 deletions.
20 changes: 19 additions & 1 deletion C++/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,20 @@ IF(COMMAND cmake_policy)
cmake_policy(SET CMP0003 NEW)
ENDIF(COMMAND cmake_policy)

PROJECT(GMMREG )
PROJECT(GMMREG)
SET(CMAKE_CXX_STANDARD 11)
SET(CMAKE_POSITION_INDEPENDENT_CODE ON)

# Set a default build type if none was specified
set(default_build_type "Release")
if(NOT CMAKE_BUILD_TYPE AND NOT WIN32)
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
STRING "Choose the type of build." FORCE)
# Set the possible values of build type for cmake-gui
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release"
"MinSizeRel" "RelWithDebInfo")
endif()

# The CMake command FIND_PACKAGE(VXL) attempts to find VXL binary
# installation. CMake will look in the directory specified by the
Expand Down Expand Up @@ -78,6 +90,12 @@ ELSE(WIN32)
ADD_LIBRARY(port_ini ${INI_LIBRARY_TYPE} port_ini.c)
ENDIF(WIN32)

find_package(OpenMP)
if (OPENMP_FOUND)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()

SET(GMMREG_API_SRCS
gmmreg_api.cpp
Expand Down
14 changes: 7 additions & 7 deletions C++/gmmreg_transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ int ThinPlateSplineTransform(const char* f_config) {
if (LoadMatrixFromTxt(f_scene, scene) < 0) {
return -1;
}
assert(scene.cols() == d);
// assert(scene.cols() == d);

char f_ctrl_pts[256] = {0};
GetPrivateProfileString(common_section, "ctrl_pts", NULL,
f_ctrl_pts, 256, f_config);
if (LoadMatrixFromTxt(f_ctrl_pts, ctrl_pts) < 0) {
return -1;
}
assert(ctrl_pts.cols() == d);
// assert(ctrl_pts.cols() == d);
int n = ctrl_pts.rows();

char f_source[256] = {0};
Expand All @@ -48,7 +48,7 @@ int ThinPlateSplineTransform(const char* f_config) {
if (LoadMatrixFromTxt(f_source, source) < 0) {
return -1;
}
assert(source.cols() == d);
// assert(source.cols() == d);
int m = source.rows();

vnl_matrix<double> affine, tps;
Expand All @@ -58,16 +58,16 @@ int ThinPlateSplineTransform(const char* f_config) {
if (LoadMatrixFromTxt(f_init_affine, affine) < 0) {
return -1;
}
assert(affine.cols() == d);
assert(affine.rows() == d+1);
// assert(affine.cols() == d);
// assert(affine.rows() == d+1);

GetPrivateProfileString(common_section, "init_tps", NULL,
f_init_tps, 256, f_config);
if (LoadMatrixFromTxt(f_init_tps, tps) < 0) {
return -1;
}
assert(tps.cols() == d);
assert(tps.rows() == (n - d - 1));
// assert(tps.cols() == d);
// assert(tps.rows() == (n - d - 1));

vnl_matrix<double> param_all;
param_all.set_size(n, d);
Expand Down
62 changes: 38 additions & 24 deletions C++/gmmreg_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

namespace gmmreg {


template<typename T>
T GaussTransform(const vnl_matrix<T>& A,
const vnl_matrix<T>& B,
Expand All @@ -18,8 +19,6 @@ T GaussTransform(const vnl_matrix<T>& A,
vnl_matrix<T>& gradient);

template<typename T>
//T GaussTransform(const vnl_matrix<T>& A, const vnl_matrix<T>& B, T scale, T* gradient);
//T GaussTransform(const T* A, const T* B, int m, int n, int dim, T scale, T* grad);
void ComputeTPSKernel(const vnl_matrix<T>& model,
const vnl_matrix<T>& ctrl_pts,
vnl_matrix<T>& U,
Expand All @@ -37,8 +36,6 @@ void ComputeSquaredDistanceMatrix(const vnl_matrix<T>& A,
const vnl_matrix<T>& B,
vnl_matrix<T>& D);

//void normalize(const vnl_matrix<T>& x, vnl_vector<T>& centroid, T& scale, vnl_matrix<T>& normalized_x);
//void denormalize(const vnl_matrix<T>& x, const vnl_vector<T>& centroid, const T scale, vnl_matrix<T>& denormalized_x);
template<typename T>
void Normalize(vnl_matrix<T>& x,
vnl_vector<T>& centroid,
Expand Down Expand Up @@ -86,13 +83,16 @@ template<typename T>
void GaussianAffinityMatrix(const T* A, const T* B,
int m, int n, int dim, T scale, T* dist) {
scale = -2.0*SQR(scale);
for (int i = 0, k = 0; i < m; ++i) {
for (int j = 0; j < n; ++j, ++k) {
int k = 0;
#pragma omp for
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
T dist_ij = 0;
for (int d = 0; d < dim; ++d) {
dist_ij += SQR(A[i * dim + d] - B[j * dim + d]);
}
dist[k] = exp(dist_ij / scale);
++k;
}
}
}
Expand All @@ -101,11 +101,15 @@ template<typename T>
T GaussTransform(const T* A, const T* B,
int m, int n, int dim, T scale, T* grad) {
T cross_term = 0;

#pragma omp for
for (int i = 0; i < m * dim; ++i) {
grad[i] = 0;
}

scale = SQR(scale);

#pragma omp parallel for reduction(+:cross_term)
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
T dist_ij = 0;
Expand All @@ -118,9 +122,10 @@ T GaussTransform(const T* A, const T* B,
}
cross_term += cost_ij;
}
/* printf("cross_term = %.3f\n", cross_term); */
}

scale *= m * n;
#pragma omp for
for (int i = 0; i < m * dim; ++i) {
grad[i] /= scale;
}
Expand All @@ -145,7 +150,7 @@ T GaussTransform(const vnl_matrix<T>& A,
gradient.data_block());
}

// todo: add one more version when the model is same as ctrl_pts
// TODO: add one more version when the model is same as ctrl_pts
// reference: Landmark-based Image Analysis, Karl Rohr, p195
template<typename T>
void ComputeTPSKernel(const vnl_matrix<T>& model,
Expand All @@ -161,10 +166,10 @@ void ComputeTPSKernel(const vnl_matrix<T>& model,
U.fill(0);
T eps = 1e-006;

vnl_vector<T> v_ij;
#pragma omp for
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
v_ij = model.get_row(i) - ctrl_pts.get_row(j);
vnl_vector<T> v_ij = model.get_row(i) - ctrl_pts.get_row(j);
if (d == 2) {
T r = v_ij.squared_magnitude();
if (r > eps) {
Expand All @@ -176,9 +181,11 @@ void ComputeTPSKernel(const vnl_matrix<T>& model,
}
}
}

#pragma omp for
for (int i = 0; i < n; ++i) {
for (int j = i + 1; j < n; ++j) {
v_ij = ctrl_pts.get_row(i) - ctrl_pts.get_row(j);
vnl_vector<T> v_ij = ctrl_pts.get_row(i) - ctrl_pts.get_row(j);
if (d == 2) {
T r = v_ij.squared_magnitude();
if (r > eps) {
Expand All @@ -191,6 +198,8 @@ void ComputeTPSKernel(const vnl_matrix<T>& model,
}
}
}

#pragma omp for
for (int i = 0; i < n; ++i) {
for (int j = 0; j < i; ++j) {
K(i, j) = K(j, i);
Expand Down Expand Up @@ -242,6 +251,8 @@ void ComputeSquaredDistanceMatrix(const vnl_matrix<T>& A,
//asssert(A.cols()==B.cols());
D.set_size(m,n);
vnl_vector<T> v_ij;

#pragma omp for
for (int i = 0; i < m; ++i) {
for (int j = 0; j < n; ++j) {
v_ij = A.get_row(i) - B.get_row(j);
Expand All @@ -259,10 +270,12 @@ void Normalize(vnl_matrix<T>& x,
centroid.set_size(d);

vnl_vector<T> col;
#pragma omp for
for (int i = 0; i < d; ++i) {
col = x.get_column(i);
centroid(i) = col.mean();
}
#pragma omp for
for (int i = 0; i < n; ++i) {
x.set_row(i, x.get_row(i) - centroid);
}
Expand All @@ -271,56 +284,57 @@ void Normalize(vnl_matrix<T>& x,
}

template<typename T>
void Denormalize(vnl_matrix<T>& x, const vnl_vector<T>& centroid, const T scale) {
void Denormalize(vnl_matrix<T>& x, const vnl_vector<T>& centroid,
const T scale) {
int n = x.rows();
if (n==0) return;
int d = x.cols();
#pragma omp for
for (int i = 0; i < n; ++i) {
x.set_row(i, x.get_row(i) * scale + centroid);
}
}

template<typename T>
void ComputeP(const vnl_matrix<T>& x,const vnl_matrix<T>& y, vnl_matrix<T>& P, T &E, T sigma, int outliers) {
void ComputeP(const vnl_matrix<T>& x, const vnl_matrix<T>& y, vnl_matrix<T>& P,
T &E, T sigma, int outliers) {
T k;
k = -2*sigma*sigma;

//P.set_size(m,n); P.fill(0);
//vnl_vector<T> v_ij;

vnl_vector<T> column_sum;
int m = x.rows();
int s = y.rows();
int d = x.cols();
column_sum.set_size(s);
column_sum.fill(0);
T outlier_term = outliers*pow((2*sigma*sigma*3.1415926),0.5*d);
T outlier_term = outliers * pow((2*sigma*sigma*3.1415926), 0.5*d);
#pragma omp for
for (int i = 0; i < m; ++i) {
for (int j = 0; j < s; ++j) {
T r = 0;
for (int t = 0; t < d; ++t) {
r += (x(i,t) - y(j,t))*(x(i,t) - y(j,t));
r += (x(i, t) - y(j, t)) * (x(i, t) - y(j, t));
}
P(i,j) = exp(r/k);
column_sum[j]+=P(i,j);
column_sum[j] += P(i,j);
}
}


if (outliers!=0) {
if (outliers != 0) {
#pragma omp for
for (int i = 0; i < s; ++i)
column_sum[i] += outlier_term;
}
if (column_sum.min_value()>(1e-12)) {
if (column_sum.min_value( )> (1e-12)) {
E = 0;
#pragma omp for
for (int i = 0; i < s; ++i) {
for (int j = 0; j < m; ++j){
P(j,i) = P(j,i)/column_sum[i];
P(j, i) = P(j, i) / column_sum[i];
}
E -= log(column_sum[i]);
}
//vcl_cerr< < s;
//vcl_cerr<<P.get_column(10);
}
else {
P.empty();
Expand Down

0 comments on commit 36fb490

Please sign in to comment.