forked from pytorch/FBGEMM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFbgemmBfloat16Convert.cc
81 lines (72 loc) · 1.9 KB
/
FbgemmBfloat16Convert.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#define FBGEMM_EXPORTS
#include "fbgemm/FbgemmConvert.h"
#include "./RefImplementations.h"
#ifdef USE_MKL
#include <mkl.h>
#endif
#ifdef USE_BLAS
#if __APPLE__
// not sure whether need to differentiate TARGET_OS_MAC or TARGET_OS_IPHONE,
// etc.
#include <Accelerate/Accelerate.h>
#else
#include <cblas.h>
#endif
#endif
#include <cpuinfo.h>
#include <memory>
#include <utility>
#include <vector>
#ifdef FBGEMM_MEASURE_TIME_BREAKDOWN
double naive_malloc_time = 0.0;
double naive_A_bf16_to_fp32_time = 0.0;
double naive_B_bf16_to_fp32_time = 0.0;
double naive_C_bf16_to_fp32_time = 0.0;
double naive_computing_time = 0.0;
double naive_C_fp32_to_bf16_time = 0.0;
double naive_run_time = 0.0;
#endif
namespace fbgemm {
void FloatToBfloat16_simd(const float* src, bfloat16* dst, size_t size) {
// Run time CPU detection
if (cpuinfo_initialize()) {
#ifndef __aarch64__
if (fbgemmHasAvx512Support()) {
FloatToBfloat16_avx512(src, dst, size);
} else
#endif
if (fbgemmHasAvx2Support()) {
FloatToBfloat16_avx2(src, dst, size);
} else {
FloatToBfloat16_ref(src, dst, size);
return;
}
} else {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
}
void Bfloat16ToFloat_simd(const bfloat16* src, float* dst, size_t size) {
// Run time CPU detection
if (cpuinfo_initialize()) {
#ifndef __aarch64__
if (fbgemmHasAvx512Support()) {
Bfloat16ToFloat_avx512(src, dst, size);
} else
#endif
if (fbgemmHasAvx2Support()) {
Bfloat16ToFloat_avx2(src, dst, size);
} else {
Bfloat16ToFloat_ref(src, dst, size);
return;
}
} else {
throw std::runtime_error("Failed to initialize cpuinfo!");
}
}
} // namespace fbgemm