[R package] GPU support (dmlc#2732)

* [R] MSVC compatibility * [GPU] allow seed in BernoulliRng up to size_t and scale to uint32_t * R package build with cmake and CUDA * R package CUDA build fixes and cleanups * always export the R package native initialization routine on windows * update the install instructions doc * fix lint * use static_cast directly to set BernoulliRng seed * [R] demo for GPU accelerated algorithm * tidy up the R package cmake stuff * R pack cmake: installs main dependency packages if needed * [R] version bump in DESCRIPTION * update NEWS * added short missing/sparse values explanations to FAQ
sagunb · Sep 28, 2017 · 74db975 · 74db975
1 parent 5c9f01d
commit 74db975
Show file tree

Hide file tree

Showing 14 changed files with 392 additions and 28 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required (VERSION 3.2)
 project(xgboost)
 include(cmake/Utils.cmake)
+list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")
 find_package(OpenMP)
 
 set_default_configuration_release()
@@ -10,6 +11,7 @@ msvc_use_static_runtime()
 option(USE_CUDA  "Build with GPU acceleration") 
 option(JVM_BINDINGS "Build JVM bindings" OFF)
 option(GOOGLE_TEST "Build google tests" OFF)
+option(R_LIB "Build shared library for R package" OFF)
 set(GPU_COMPUTE_VER 35;50;52;60;61 CACHE STRING
   "Space separated list of compute versions to be built against")
 
@@ -34,6 +36,19 @@ else()
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funroll-loops")
 endif()
 
+# compiled code customizations for R package
+if(R_LIB)
+  add_definitions(
+    -DXGBOOST_STRICT_R_MODE=1
+    -DXGBOOST_CUSTOMIZE_GLOBAL_PRNG=1
+    -DDMLC_LOG_BEFORE_THROW=0
+    -DDMLC_DISABLE_STDIN=1
+    -DDMLC_LOG_CUSTOMIZE=1
+    -DRABIT_CUSTOMIZE_MSG_
+    -DRABIT_STRICT_CXX98_
+  )
+endif()
+
 include_directories (
     ${PROJECT_SOURCE_DIR}/include
     ${PROJECT_SOURCE_DIR}/dmlc-core/include
@@ -66,7 +81,7 @@ set(RABIT_EMPTY_SOURCES
     rabit/src/engine_empty.cc
     rabit/src/c_api.cc
 )
-if(MINGW)
+if(MINGW OR R_LIB)
   # build a dummy rabit library
   add_library(rabit STATIC ${RABIT_EMPTY_SOURCES})
 else()
@@ -78,6 +93,7 @@ endif()
 add_subdirectory(dmlc-core)
 set(LINK_LIBRARIES dmlccore rabit)
 
+
 if(USE_CUDA)
   find_package(CUDA 7.5 REQUIRED)
   cmake_minimum_required(VERSION 3.5)
@@ -102,27 +118,65 @@ if(USE_CUDA)
   list(APPEND LINK_LIBRARIES gpuxgboost) 
 endif()
 
+
+# flags and sources for R-package
+if(R_LIB)
+  file(GLOB_RECURSE R_SOURCES
+    R-package/src/*.h
+    R-package/src/*.c
+    R-package/src/*.cc
+  )
+  list(APPEND SOURCES ${R_SOURCES})
+endif()
+
 add_library(objxgboost OBJECT ${SOURCES})
 
-# Executable
-add_executable(runxgboost $<TARGET_OBJECTS:objxgboost> src/cli_main.cc)
-set_target_properties(runxgboost PROPERTIES 
-  OUTPUT_NAME xgboost 
-)
-set_output_directory(runxgboost ${PROJECT_SOURCE_DIR})
-target_link_libraries(runxgboost ${LINK_LIBRARIES})
-
-# Shared library
-add_library(xgboost SHARED $<TARGET_OBJECTS:objxgboost>)
-target_link_libraries(xgboost ${LINK_LIBRARIES})
-set_output_directory(xgboost ${PROJECT_SOURCE_DIR}/lib)
-if(MINGW)
-  # remove the 'lib' prefix to conform to windows convention for shared library names
+
+# building shared library for R package
+if(R_LIB)
+  find_package(LibR REQUIRED)
+
+  list(APPEND LINK_LIBRARIES "${LIBR_CORE_LIBRARY}")
+  MESSAGE(STATUS "LIBR_CORE_LIBRARY " ${LIBR_CORE_LIBRARY})
+
+  include_directories(
+    "${LIBR_INCLUDE_DIRS}"
+    "${PROJECT_SOURCE_DIR}"
+  )
+
+  # Shared library target for the R package
+  add_library(xgboost SHARED $<TARGET_OBJECTS:objxgboost>)
+  target_link_libraries(xgboost ${LINK_LIBRARIES})
+  # R uses no lib prefix in shared library names of its packages
   set_target_properties(xgboost PROPERTIES PREFIX "")
+
+  setup_rpackage_install_target(xgboost ${CMAKE_CURRENT_BINARY_DIR})
+  # use a dummy location for any other remaining installs
+  set(CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/dummy_inst")
+
+# main targets: shared library & exe
+else()
+  # Executable
+  add_executable(runxgboost $<TARGET_OBJECTS:objxgboost> src/cli_main.cc)
+  set_target_properties(runxgboost PROPERTIES
+    OUTPUT_NAME xgboost
+  )
+  set_output_directory(runxgboost ${PROJECT_SOURCE_DIR})
+  target_link_libraries(runxgboost ${LINK_LIBRARIES})
+
+  # Shared library
+  add_library(xgboost SHARED $<TARGET_OBJECTS:objxgboost>)
+  target_link_libraries(xgboost ${LINK_LIBRARIES})
+  set_output_directory(xgboost ${PROJECT_SOURCE_DIR}/lib)
+  if(MINGW)
+    # remove the 'lib' prefix to conform to windows convention for shared library names
+    set_target_properties(xgboost PROPERTIES PREFIX "")
+  endif()
+
+  #Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
+  add_dependencies(xgboost runxgboost)
 endif()
 
-#Ensure these two targets do not build simultaneously, as they produce outputs with conflicting names
-add_dependencies(xgboost runxgboost)
 
 # JVM
 if(JVM_BINDINGS)
@@ -139,6 +193,7 @@ if(JVM_BINDINGS)
         ${JAVA_JVM_LIBRARY})
 endif()
 
+
 # Test
 if(GOOGLE_TEST)
   enable_testing()
@@ -162,5 +217,6 @@ if(GOOGLE_TEST)
   add_test(TestXGBoost testxgboost)
 endif()
 
+
 # Group sources
 auto_source_group("${SOURCES}")
diff --git a/NEWS.md b/NEWS.md
@@ -15,15 +15,19 @@ This file records the changes in xgboost library in reverse chronological order.
   - Thread local variable is upgraded so it is automatically freed at thread exit.
 * Migrate to C++11
   - The current master version now requires C++11 enabled compiled(g++4.8 or higher)
+* Predictor interface was factored out (in a manner similar to the updater interface).
 * New functionality
   - Ability to adjust tree model's statistics to a new dataset without changing tree structures.
   - Extracting feature contributions to individual predictions.
+  - Faster, histogram-based tree algorithm (`tree_method='hist'`) .
+  - GPU/CUDA accelerated tree algorithms (`tree_method='gpu_hist'` or `'gpu_exact'`), including the GPU-based predictor.
 * R package:
   - New parameters:
     - `silent` in `xgb.DMatrix()`
     - `use_int_id` in `xgb.model.dt.tree()`
     - `predcontrib` in `predict()`
   - Default value of the `save_period` parameter in `xgboost()` changed to NULL (consistent with `xgb.train()`).
+  - It's possible to custom-build the R package with GPU acceleration support.
 
 ## v0.6 (2016.07.29)
 * Version 0.5 is skipped due to major improvements in the core

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: xgboost
 Type: Package
 Title: Extreme Gradient Boosting
-Version: 0.6.4.6
-Date: 2017-01-04
+Version: 0.6.4.7
+Date: 2017-09-25
 Author: Tianqi Chen <[email protected]>, Tong He <[email protected]>,
     Michael Benesty <[email protected]>, Vadim Khotilovich <[email protected]>,
     Yuan Tang <[email protected]>

diff --git a/R-package/demo/00Index b/R-package/demo/00Index
@@ -10,3 +10,4 @@ predict_leaf_indices            Predicting the corresponding leaves
 early_stopping                  Early Stop in training
 poisson_regression              Poisson Regression on count data
 tweedie_regression              Tweddie Regression
+gpu_accelerated                 GPU-accelerated tree building algorithms
diff --git a/R-package/demo/README.md b/R-package/demo/README.md
@@ -8,6 +8,7 @@ XGBoost R Feature Walkthrough
 * [Generalized Linear Model](generalized_linear_model.R)
 * [Cross validation](cross_validation.R)
 * [Create a sparse matrix from a dense one](create_sparse_matrix.R)
+* [Use GPU-accelerated tree building algorithms](gpu_accelerated.R)
 
 Benchmarks
 ====

diff --git a/R-package/demo/gpu_accelerated.R b/R-package/demo/gpu_accelerated.R
@@ -0,0 +1,45 @@
+# An example of using GPU-accelerated tree building algorithms
+# 
+# NOTE: it can only run if you have a CUDA-enable GPU and the package was 
+#       specially compiled with GPU support.
+#
+# For the current functionality, see 
+# https://xgboost.readthedocs.io/en/latest/gpu/index.html
+#
+
+library('xgboost')
+
+# Simulate N x p random matrix with some binomial response dependent on pp columns
+set.seed(111)
+N <- 1000000
+p <- 50
+pp <- 25
+X <- matrix(runif(N * p), ncol = p)
+betas <- 2 * runif(pp) - 1
+sel <- sort(sample(p, pp))
+m <- X[, sel] %*% betas - 1 + rnorm(N)
+y <- rbinom(N, 1, plogis(m))
+
+tr <- sample.int(N, N * 0.75)
+dtrain <- xgb.DMatrix(X[tr,], label = y[tr])
+dtest <- xgb.DMatrix(X[-tr,], label = y[-tr])
+wl <- list(train = dtrain, test = dtest)
+
+# An example of running 'gpu_hist' algorithm
+# which is
+# - similar to the 'hist'
+# - the fastest option for moderately large datasets
+# - current limitations: max_depth < 16, does not implement guided loss
+# You can use tree_method = 'gpu_exact' for another GPU accelerated algorithm,
+# which is slower, more memory-hungry, but does not use binning.
+param <- list(objective = 'reg:logistic', eval_metric = 'auc', subsample = 0.5, nthread = 4,
+              max_bin = 64, tree_method = 'gpu_hist')
+pt <- proc.time()
+bst_gpu <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+proc.time() - pt
+
+# Compare to the 'hist' algorithm:
+param$tree_method <- 'hist'
+pt <- proc.time()
+bst_hist <- xgb.train(param, dtrain, watchlist = wl, nrounds = 50)
+proc.time() - pt
diff --git a/R-package/demo/runall.R b/R-package/demo/runall.R
@@ -10,4 +10,5 @@ demo(predict_leaf_indices)
 demo(early_stopping)
 demo(poisson_regression)
 demo(caret_wrapper)
-demo(tweedie_regression)
+demo(tweedie_regression)
+#demo(gpu_accelerated) # can only run when built with GPU support
diff --git a/R-package/src/init.c b/R-package/src/init.c
@@ -68,6 +68,9 @@ static const R_CallMethodDef CallEntries[] = {
   {NULL, NULL, 0}
 };
 
+#if defined(_WIN32)
+__declspec(dllexport)
+#endif
 void R_init_xgboost(DllInfo *dll) {
   R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
   R_useDynamicSymbols(dll, FALSE);

diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc
@@ -112,7 +112,7 @@ SEXP XGDMatrixCreateFromCSC_R(SEXP indptr,
     col_ptr_[i] = static_cast<size_t>(p_indptr[i]);
   }
   #pragma omp parallel for schedule(static)
-  for (size_t i = 0; i < ndata; ++i) {
+  for (int64_t i = 0; i < static_cast<int64_t>(ndata); ++i) {
     indices_[i] = static_cast<unsigned>(p_indices[i]);
     data_[i] = static_cast<float>(p_data[i]);
   }

diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
@@ -59,4 +59,28 @@ function(format_gencode_flags flags out)
     set(${out} "${${out}}-gencode arch=compute_${ver},code=sm_${ver};")
   endforeach()
   set(${out} "${${out}}" PARENT_SCOPE)
-endfunction(format_gencode_flags flags)
+endfunction(format_gencode_flags flags)
+
+# Assembles the R-package files in build_dir;
+# if necessary, installs the main R package dependencies;
+# runs R CMD INSTALL.
+function(setup_rpackage_install_target rlib_target build_dir)
+  install(CODE "file(REMOVE_RECURSE \"${build_dir}/R-package\")")
+  install(
+    DIRECTORY "${PROJECT_SOURCE_DIR}/R-package"
+    DESTINATION "${build_dir}"
+    REGEX "src/*" EXCLUDE
+    REGEX "R-package/configure" EXCLUDE
+  )
+  install(TARGETS ${rlib_target}
+    LIBRARY DESTINATION "${build_dir}/R-package/src/"
+    RUNTIME DESTINATION "${build_dir}/R-package/src/")
+  install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars\" \"all:\")")
+  install(CODE "file(WRITE \"${build_dir}/R-package/src/Makevars.win\" \"all:\")")
+  set(XGB_DEPS_SCRIPT
+    "deps = setdiff(c('statar','data.table', 'magrittr', 'stringi'), rownames(installed.packages()));\
+    if(length(deps)>0) install.packages(deps, repo = 'https://cloud.r-project.org/')")
+  install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" \"-q\" \"-e\" \"${XGB_DEPS_SCRIPT}\")")
+  install(CODE "execute_process(COMMAND \"${LIBR_EXECUTABLE}\" CMD INSTALL\
+    \"--no-multiarch\" \"${build_dir}/R-package\")")
+endfunction(setup_rpackage_install_target)