Skip to content

Commit

Permalink
actually clamp denorm in RoundToFloat16 when clamp_denorms is set (py…
Browse files Browse the repository at this point in the history
…torch#871)

Summary:
Pull Request resolved: pytorch#871

Pull Request resolved: pytorch#457

As title. This is the follow-ups for the discussion in D24801098 (pytorch@c438e6e).

Reviewed By: jianyuh

Differential Revision: D24842171

fbshipit-source-id: 6631eb753015ca49d70aa43a6b8fe9a7e4d59b08
  • Loading branch information
jspark1105 authored and facebook-github-bot committed Jan 18, 2022
1 parent 9843c86 commit c3ad33c
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
17 changes: 15 additions & 2 deletions src/FbgemmFloat16Convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,23 @@ void RoundToFloat16(
bool clamp,
bool clamp_denorms) {
std::vector<fbgemm::float16> data_fp16(size);
// clamp_denorms is always true, since we use FloatToFloat16_simd function
// with _mm256_cvtps_ph.
FloatToFloat16_simd(input, &(data_fp16[0]), size, /*do_clip=*/clamp);
Float16ToFloat_simd(&(data_fp16[0]), output, size);
if (clamp_denorms) {
// FloatToFloat16_simd always preserve fp16 denorm, so we need to manually
// clamp.
union epsilon_t {
float f;
uint32_t i;
};
union epsilon_t epsilon;
epsilon.i = 0x38800000u; // 1 / 16384
for (size_t i = 0; i < size; ++i) {
if (std::abs(output[i]) < epsilon.f) {
output[i] = 0.0;
}
}
}
}

} // namespace fbgemm
4 changes: 3 additions & 1 deletion test/Float16ConvertTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ TEST_P(FBGemmFloat16Test, Conversion_fake_rounding) {
vector<vector<int>> shapes;
random_device r;
default_random_engine generator(r());
uniform_int_distribution<int> dm(2, 1024 * 256);
uniform_int_distribution<int> dm(32, 1024 * 256);

for (int i = 0; i < 10; i++) {
int m = dm(generator);
Expand All @@ -159,6 +159,7 @@ TEST_P(FBGemmFloat16Test, Conversion_fake_rounding) {
if (do_clip) {
A_fp32_ref[0] += 1024 * FP16_MAX;
A_fp32_ref[1] = 1e-10;
A_fp32_ref[2] = 5.5e-8;
}

RoundToFloat16(A_fp32_ref.data(), A_fp32_final.data(), m, do_clip, do_clip);
Expand Down Expand Up @@ -186,6 +187,7 @@ TEST_P(FBGemmFloat16Test, Conversion_fake_rounding) {
}
if (do_clip) {
EXPECT_EQ(A_fp32_final[1], 0.0);
EXPECT_EQ(A_fp32_final[2], 0.0);
}
}
}

0 comments on commit c3ad33c

Please sign in to comment.