Skip to content

Commit

Permalink
fbgemm handles block size 1 sparse adagrad (pytorch#306)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#306

Make fbgemm behave same for block size 1 as in the existing Caffe2 code so we don't need to separately handle block size 1 from Caffe2 code and just call fbgemm.
Inside fbgemm, rely on the compiler to generate an efficient code for SparseAdaGrad with block size 1 instead of JIT'ing.

Reviewed By: jianyuh

Differential Revision: D19246900

fbshipit-source-id: 5fb3e03d7d9a9a8f7ed884616f5ce20e4e903b0b
  • Loading branch information
jspark1105 authored and facebook-github-bot committed Feb 27, 2020
1 parent e1b1a55 commit 967d4bc
Showing 1 changed file with 64 additions and 0 deletions.
64 changes: 64 additions & 0 deletions src/SparseAdagrad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,57 @@ GenSparseAdagrad<indxType, instSet>::getOrCreate(
});
} // getOrCreate

// Specialization for block size 1 internally called by GenerateSparseAdaGrad
template <typename IndexType>
int SparseAdaGradBlockSize1_(
int num_rows, // number of rows reading
std::uint64_t param_size, // total number of parameters
float* w, // input/output parameters
const float* g, // input gradients
float* h, // input/output momentums
const IndexType* indices, // indices of each row
float epsilon,
float lr,
bool rowwise) {
for (int i = 0; i < num_rows; ++i) {
IndexType idx = indices[i];
if (idx >= param_size) {
return i;
}

float gi = g[i];
float hi = h[idx] = h[idx] + gi * gi;
if (rowwise) {
w[idx] += lr / (std::sqrt(hi) + epsilon) * gi;
} else {
w[idx] += lr * gi / (std::sqrt(hi) + epsilon);
}
}
return num_rows;
}

template int SparseAdaGradBlockSize1_(
int num_rows, // number of rows reading
std::uint64_t param_size, // total number of parameters
float* w, // input parameters
const float* g, // input gradients
float* h, // input momentums
const std::int64_t* indices, // indices of each row
float epsilon,
float lr,
bool rowwise);

template int SparseAdaGradBlockSize1_(
int num_rows, // number of rows reading
std::uint64_t param_size, // total number of parameters
float* w, // input parameters
const float* g, // input gradients
float* h, // input momentums
const std::int32_t* indices, // indices of each row
float epsilon,
float lr,
bool rowwise);

} // namespace

template <typename IndexType>
Expand All @@ -652,6 +703,19 @@ typename SparseAdaGradSignature<IndexType>::Type GenerateSparseAdaGrad(
}

if (fbgemmHasAvx512Support() || fbgemmHasAvx2Support()) {
if (block_size == 1) {
return [=](int num_rows, // number of rows reading
std::uint64_t param_size, // total number of parameters
float* w, // input/output parameters
const float* g, // input gradients
float* h, // input/output momentums
const IndexType* indices, // indices of each row
float epsilon,
float lr) {
return SparseAdaGradBlockSize1_(
num_rows, param_size, w, g, h, indices, epsilon, lr, rowwise);
};
}
static GenSparseAdagrad<IndexType, inst_set_t::avx2> kernel_generator;
constexpr int VLEN = simd_info<inst_set_t::avx2>::WIDTH_32BIT_ELEMS;
const int* mask_avx2 = &internal::avx2_ps_or_epi32_combined_mask
Expand Down

0 comments on commit 967d4bc

Please sign in to comment.