Skip to content
This repository has been archived by the owner on Jul 18, 2018. It is now read-only.

Commit

Permalink
performance degrading bug fixes and further speed optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
Tong Zhang committed Sep 3, 2017
1 parent 516f7e7 commit a4a115c
Show file tree
Hide file tree
Showing 12 changed files with 445 additions and 69 deletions.
7 changes: 7 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,10 @@
0.3 (Dec 2016)
Fixed several bugs that affect prediction performance (especially for small datasets).

0.4 (Aug 2017)
Fixed bug which truncated negative float values to numeric_limits<float>::min(), causing degration in prediction performance for datasets with negative values; changed truncation to numeric_limits<float>::lowest()

0.5 (Sept 2017)
Added openmp support and multi-threading for discretization
Added loop unrolling and compilation option for simd optimization

30 changes: 27 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,37 @@ cmake_minimum_required (VERSION 2.8.0)

project (FastRGF)

set(CMAKE_CXX_FLAGS "-O3 -std=c++11")
# whether to use openmp (default is on)
option(OPENMP "Use openmp for multi-threads" ON)

# you may need to use the following for g++-4.8
#set(CMAKE_CXX_FLAGS "-O3 -std=c++11 -pthread")

set(CMAKE_CXX_FLAGS "-O3 -std=c++11")
#set(CMAKE_CXX_FLAGS "-g -std=c++11 -Wall")

if(OPENMP)
message("use openmp for multi-threads")
# use openmp
add_definitions("-DUSE_OMP")

if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
# mac os x: clang does not support openmp, need to install a g++ with openmp support
set(CMAKE_CXX_COMPILER "/usr/local/bin/g++-7")
set (CMAKE_CXX_COMPILER_ID "GNU")
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
else()
message("use standard c++11 thread library")
endif()

if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -ftree-vectorize -ffast-math")
endif()
message("c++ compiler= " ${CMAKE_CXX_COMPILER})
message("c++ options= " ${CMAKE_CXX_FLAGS})
get_directory_property( cDirDefs DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS )
message("c++ definitions= " ${cDirDefs})

include_directories(include)


Expand Down
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,6 @@ The implementation of greedy tree node optimization employs second order Newton
Please see the file [CHANGES](CHANGES) for version information.
The software is written in c++11, and it has been tested under linux and macos, and it may require g++ version 4.8 or above and cmake version 2.8 or above.

If you use *g++-4.8*, after running the exmaples, you may get error messages similar to the following:

terminate called after throwing an instance of 'std::system_error'
what(): Enable multithreading to use std::thread: Operation not permitted

If this occurs, you need to add the **-pthread** flag in [CMakeLists.txt](CMakeLists.txt) to the variable CMAKE_CXX_FLAGS in order to enable multi-threading. This problem seems to be a bug in the g++ compiler. There may be variations of this problem specific to your system that require different fixes.

To install the binaries, unpackage the software into a directory.

Expand Down
9 changes: 8 additions & 1 deletion include/header.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,22 @@

#include <cstring>

#ifdef USE_OMP
#include "omp.h"
#endif

namespace rgf {


#define VER "version 0.2 (August 2016) by Tong Zhang"
#define VER "version 0.4 (Aug 2017) by Tong Zhang"


const int max_thrds=128;


const int vect_width=8;


using int_t=long;


Expand Down
14 changes: 14 additions & 0 deletions include/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "header.h"


namespace rgf {

class MyIO {
Expand Down Expand Up @@ -241,13 +242,26 @@ class Timer {
mr.reduce(0);
return;
}
static const bool use_omp=true;

#ifndef USE_OMP
for (tid=0; tid<nthreads; tid++) {
_th[tid]= thread(& MapReduceRunner::single_thread_map_reduce<T>, this,
std::ref(mr),begin, end, tid, nthreads,run_range);
}
#else
omp_set_num_threads(nthreads);
#pragma omp parallel for
for (tid=0; tid<nthreads; tid++) {
single_thread_map_reduce<T>(std::ref(mr),begin,end,tid,nthreads,run_range);
}
#endif

mr.master();
for (tid=0; tid<nthreads; tid++) {
#ifndef USE_OMP
_th[tid].join();
#endif
mr.reduce(tid);
}
}
Expand Down
10 changes: 6 additions & 4 deletions src/base/data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class MyDataInputLineParseResult {

long tmp=atol(token_str);
if (tmp >= numeric_limits<i_t>::max() ||
tmp< numeric_limits<i_t>::min()) {
tmp< numeric_limits<i_t>::lowest()) {
throw MyDataInputException(" : index out of range",lno);
}
result.index=tmp;
Expand All @@ -64,13 +64,14 @@ class MyDataInputLineParseResult {
if (is_same<v_t,float>::value ||is_same<v_t,double>::value) {
double tmp= atof(token_str + (sep+1));
if (tmp>= numeric_limits<float>::max()) tmp=numeric_limits<float>::max();
if (tmp<= numeric_limits<float>::min()) tmp=numeric_limits<float>::min();
if (tmp<= numeric_limits<float>::lowest()) tmp=numeric_limits<float>::lowest();
if (!(tmp == tmp)) tmp=numeric_limits<float>::lowest();
result.value=(float)tmp;
}
else {
tmp=atol(token_str+ (sep+1));
if (tmp >= numeric_limits<v_t>::max() ||
tmp< numeric_limits<v_t>::min()) {
tmp< numeric_limits<v_t>::lowest()) {
throw MyDataInputException(" : value out of range",lno);
}
result.value=tmp;
Expand All @@ -93,7 +94,8 @@ class MyDataInputLineParseResult {

double tmp=stod(token);
if (tmp>= numeric_limits<float>::max()) tmp=numeric_limits<float>::max();
if (tmp<= numeric_limits<float>::min()) tmp=numeric_limits<float>::min();
if (tmp<= numeric_limits<float>::lowest()) tmp=numeric_limits<float>::lowest();
if (!(tmp == tmp)) tmp=numeric_limits<float>::lowest();
feats_dense.push_back((float)tmp);
continue;
}
Expand Down
Loading

0 comments on commit a4a115c

Please sign in to comment.