forked from pytorch/FBGEMM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GenerateKernelU8S8S32ACC16Avx512VNNI.cc
50 lines (43 loc) · 1.49 KB
/
GenerateKernelU8S8S32ACC16Avx512VNNI.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <iostream>
#include "./GenerateKernel.h"
namespace fbgemm {
namespace x86 = asmjit::x86;
/**
* Get or Create the AVX512 instructions for 16-bit Accumulation macro-kernel.
*
*/
template <>
template <>
CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::jit_micro_kernel_fp
CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<
inst_set_t::avx512_vnni>(bool accum, int32_t mc, int32_t nc, int32_t kc) {
assert(0 && "Accumulation to int16_t is not available for VNNI!");
// For AVX512VNNI, redirect to int32_t accumulation.
CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
return codeObj.getOrCreate<inst_set_t::avx512_vnni>(accum, mc, nc, kc);
}
/**
* Get or Create the AVX512 instructions for 16-bit Accumulation macro-kernel.
*
*/
template <>
template <>
CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::jit_micro_kernel_fp
CodeGenBase<uint8_t, int8_t, int32_t, int16_t>::getOrCreate<
inst_set_t::avx512_vnni_ymm>(
bool accum,
int32_t mc,
int32_t nc,
int32_t kc) {
assert(0 && "Accumulation to int16_t is not available for VNNI!");
// For AVX512VNNI, redirect to int32_t accumulation.
CodeGenBase<uint8_t, int8_t, int32_t, int32_t> codeObj;
return codeObj.getOrCreate<inst_set_t::avx512_vnni_ymm>(accum, mc, nc, kc);
}
} // namespace fbgemm