forked from pytorch/FBGEMM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFbgemmFloat16Convert.cc
84 lines (74 loc) · 2 KB
/
FbgemmFloat16Convert.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
/*
* Copyright (c) Facebook, Inc. and its affiliates.
* All rights reserved.
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#define FBGEMM_EXPORTS
#include "fbgemm/FbgemmConvert.h"
#include "./RefImplementations.h"
#ifdef USE_MKL
#include <mkl.h>
#endif
#ifdef USE_BLAS
#if __APPLE__
// not sure whether need to differentiate TARGET_OS_MAC or TARGET_OS_IPHONE,
// etc.
#include <Accelerate/Accelerate.h>
#else
#include <cblas.h>
#endif
#endif
#include <cpuinfo.h>
#include <memory>
#include <utility>
#include <vector>
using namespace std;
namespace fbgemm {
void FloatToFloat16_simd(
const float* src,
float16* dst,
size_t size,
bool do_clip) {
// Run time CPU detection
if (cpuinfo_initialize()) {
if (fbgemmHasAvx512Support()) {
FloatToFloat16_avx512(src, dst, size, do_clip);
} else if (fbgemmHasAvx2Support()) {
FloatToFloat16_avx2(src, dst, size, do_clip);
} else {
FloatToFloat16_ref(src, dst, size, do_clip);
return;
}
} else {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
}
void Float16ToFloat_simd(const float16* src, float* dst, size_t size) {
// Run time CPU detection
if (cpuinfo_initialize()) {
if (fbgemmHasAvx512Support()) {
Float16ToFloat_avx512(src, dst, size);
} else if (fbgemmHasAvx2Support()) {
Float16ToFloat_avx2(src, dst, size);
} else {
Float16ToFloat_ref(src, dst, size);
return;
}
} else {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
}
void RoundToFloat16(
const float* input,
float* output,
size_t size,
bool clamp,
bool clamp_denorms) {
std::vector<fbgemm::float16> data_fp16(size);
// clamp_denorms is always true, since we use FloatToFloat16_simd function
// with _mm256_cvtps_ph.
FloatToFloat16_simd(input, &(data_fp16[0]), size, /*do_clip=*/clamp);
Float16ToFloat_simd(&(data_fp16[0]), output, size);
}
} // namespace fbgemm