Skip to content

Commit

Permalink
Fix gpu devices. (dmlc#3693)
Browse files Browse the repository at this point in the history
* Fix gpu_set normalized and unnormalized.
* Fix DeviceSpan.
  • Loading branch information
trivialfis authored and RAMitchell committed Sep 19, 2018
1 parent 0f99cdf commit 9119f9e
Show file tree
Hide file tree
Showing 13 changed files with 199 additions and 138 deletions.
7 changes: 7 additions & 0 deletions src/common/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,11 @@ GlobalRandomEngine& GlobalRandom() {
return RandomThreadLocalStore::Get()->engine;
}
} // namespace common

#if !defined(XGBOOST_USE_CUDA)
int AllVisibleImpl::AllVisible() {
return 0;
}
#endif

} // namespace xgboost
18 changes: 18 additions & 0 deletions src/common/common.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*!
* Copyright 2018 XGBoost contributors
*/
#include "common.h"

namespace xgboost {

int AllVisibleImpl::AllVisible() {
int n_visgpus = 0;
try {
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
} catch(const std::exception& e) {
return 0;
}
return n_visgpus;
}

} // namespace xgboost
110 changes: 110 additions & 0 deletions src/common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,39 @@
#define XGBOOST_COMMON_COMMON_H_

#include <xgboost/base.h>
#include <xgboost/logging.h>

#include <exception>
#include <limits>
#include <type_traits>
#include <vector>
#include <string>
#include <sstream>

#if defined(__CUDACC__)
#include <thrust/system/cuda/error.h>
#include <thrust/system_error.h>
#endif

namespace dh {
#if defined(__CUDACC__)
/*
* Error handling functions
*/
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)

inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
int line) {
if (code != cudaSuccess) {
throw thrust::system_error(code, thrust::cuda_category(),
std::string{file} + "(" + // NOLINT
std::to_string(line) + ")");
}
return code;
}
#endif
} // namespace dh

namespace xgboost {
namespace common {
/*!
Expand Down Expand Up @@ -103,5 +131,87 @@ class Range {
};

} // namespace common
struct AllVisibleImpl {
static int AllVisible();
};
/* \brief set of devices across which HostDeviceVector can be distributed.
*
* Currently implemented as a range, but can be changed later to something else,
* e.g. a bitset
*/
class GPUSet {
public:
explicit GPUSet(int start = 0, int ndevices = 0)
: devices_(start, start + ndevices) {}

static GPUSet Empty() { return GPUSet(); }

static GPUSet Range(int start, int ndevices) {
return ndevices <= 0 ? Empty() : GPUSet{start, ndevices};
}
/*! \brief ndevices and num_rows both are upper bounds. */
static GPUSet All(int ndevices, int num_rows = std::numeric_limits<int>::max()) {
int n_devices_visible = AllVisible().Size();
if (ndevices < 0 || ndevices > n_devices_visible) {
ndevices = n_devices_visible;
}
// fix-up device number to be limited by number of rows
ndevices = ndevices > num_rows ? num_rows : ndevices;
return Range(0, ndevices);
}
static GPUSet AllVisible() {
int n = AllVisibleImpl::AllVisible();
return Range(0, n);
}
/*! \brief Ensure gpu_id is correct, so not dependent upon user knowing details */
static int GetDeviceIdx(int gpu_id) {
auto devices = AllVisible();
CHECK(!devices.IsEmpty()) << "Empty device.";
return (std::abs(gpu_id) + 0) % devices.Size();
}
/*! \brief Counting from gpu_id */
GPUSet Normalised(int gpu_id) const {
return Range(gpu_id, Size());
}
/*! \brief Counting from 0 */
GPUSet Unnormalised() const {
return Range(0, Size());
}

int Size() const {
int res = *devices_.end() - *devices_.begin();
return res < 0 ? 0 : res;
}
/*! \brief Get normalised device id. */
int operator[](int index) const {
CHECK(index >= 0 && index < Size());
return *devices_.begin() + index;
}

bool IsEmpty() const { return Size() == 0; }
/*! \brief Get un-normalised index. */
int Index(int device) const {
CHECK(Contains(device));
return device - *devices_.begin();
}

bool Contains(int device) const {
return *devices_.begin() <= device && device < *devices_.end();
}

common::Range::Iterator begin() const { return devices_.begin(); } // NOLINT
common::Range::Iterator end() const { return devices_.end(); } // NOLINT

friend bool operator==(const GPUSet& lhs, const GPUSet& rhs) {
return lhs.devices_ == rhs.devices_;
}
friend bool operator!=(const GPUSet& lhs, const GPUSet& rhs) {
return !(lhs == rhs);
}

private:
common::Range devices_;
};

} // namespace xgboost
#endif // XGBOOST_COMMON_COMMON_H_
1 change: 0 additions & 1 deletion src/common/device_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include <xgboost/logging.h>

#include "common.h"
#include "gpu_set.h"

#include <algorithm>
#include <chrono>
Expand Down
122 changes: 0 additions & 122 deletions src/common/gpu_set.h

This file was deleted.

4 changes: 2 additions & 2 deletions src/common/host_device_vector.cu
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,15 @@ struct HostDeviceVectorImpl {
CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kWrite);
return {shards_[devices.Index(device)].data_.data().get(),
static_cast<typename common::Span<T>::index_type>(Size())};
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
}

common::Span<const T> ConstDeviceSpan(int device) {
GPUSet devices = distribution_.devices_;
CHECK(devices.Contains(device));
LazySyncDevice(device, GPUAccess::kRead);
return {shards_[devices.Index(device)].data_.data().get(),
static_cast<typename common::Span<const T>::index_type>(Size())};
static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
}

size_t DeviceSize(int device) {
Expand Down
12 changes: 6 additions & 6 deletions src/common/host_device_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Initialization/Allocation:<br/>
* One can choose to initialize the vector on CPU or GPU during constructor.
* (use the 'devices' argument) Or, can choose to use the 'Resize' method to
* allocate/resize memory explicitly, and use the 'Reshard' method
* allocate/resize memory explicitly, and use the 'Reshard' method
* to specify the devices.
*
* Accessing underlying data:<br/>
Expand All @@ -31,7 +31,7 @@
* DevicePointer but data on CPU --> this causes a cudaMemcpy to be issued internally.
* subsequent calls to DevicePointer, will NOT incur this penalty.
* (assuming 'HostVector' is not called in between)
* DevicePointer and data on GPU --> no problems, the device ptr
* DevicePointer and data on GPU --> no problems, the device ptr
* will be returned immediately.
*
* What if xgboost is compiled without CUDA?<br/>
Expand All @@ -40,13 +40,13 @@
*
* Why not consider CUDA unified memory?<br/>
* We did consider. However, it poses complications if we need to support both
* compiling with and without CUDA toolkit. It was easier to have
* compiling with and without CUDA toolkit. It was easier to have
* 'HostDeviceVector' with a special-case implementation in host_device_vector.cc
*
* @note: Size and Devices methods are thread-safe.
* DevicePointer, DeviceStart, DeviceSize, tbegin and tend methods are thread-safe
* DevicePointer, DeviceStart, DeviceSize, tbegin and tend methods are thread-safe
* if different threads call these methods with different values of the device argument.
* All other methods are not thread safe.
* All other methods are not thread safe.
*/

#ifndef XGBOOST_COMMON_HOST_DEVICE_VECTOR_H_
Expand All @@ -59,7 +59,7 @@
#include <initializer_list>
#include <vector>

#include "gpu_set.h"
#include "common.h"
#include "span.h"

// only include thrust-related files if host_device_vector.h
Expand Down
2 changes: 1 addition & 1 deletion src/common/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include <map>
#include <string>

#include "gpu_set.h"
#include "common.h"

namespace xgboost {
namespace common {
Expand Down
2 changes: 1 addition & 1 deletion src/linear/updater_gpu_coordinate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <thrust/execution_policy.h>
#include <thrust/inner_product.h>
#include <xgboost/linear_updater.h>
#include "../common/gpu_set.h"
#include "../common/common.h"
#include "../common/device_helpers.cuh"
#include "../common/timer.h"
#include "coordinate_common.h"
Expand Down
2 changes: 1 addition & 1 deletion src/predictor/gpu_predictor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include <xgboost/tree_model.h>
#include <xgboost/tree_updater.h>
#include <memory>
#include "../common/gpu_set.h"
#include "../common/common.h"
#include "../common/device_helpers.cuh"
#include "../common/host_device_vector.h"

Expand Down
2 changes: 1 addition & 1 deletion src/tree/updater_gpu.cu
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <xgboost/tree_updater.h>
#include <utility>
#include <vector>
#include "../common/gpu_set.h"
#include "../common/common.h"
#include "param.h"
#include "updater_gpu_common.cuh"

Expand Down
Loading

0 comments on commit 9119f9e

Please sign in to comment.