Skip to content

Commit 463e590

Browse files
committed
fix: upload
1 parent a2f028c commit 463e590

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

src/code/fast/ntt_avx512f.hpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ struct NTT_AVX512F {
1717
Montgomery mt;
1818
u32 mod, g;
1919

20-
[[gnu::noinline]] u32 power(u32 base, u32 exp) const {
20+
u32 power(u32 base, u32 exp) const {
2121
const auto mt = this->mt; // ! to put Montgomery constants in registers
2222
u32 res = mt.r;
2323
for (; exp > 0; exp >>= 1) {
@@ -64,7 +64,7 @@ struct NTT_AVX512F {
6464
}
6565
// input data[i] in [0, 2 * mod)
6666
// output data[i] in [0, 4 * mod)
67-
[[gnu::noinline]] __attribute__((optimize("O3"))) void fft(u32 lg, u32 *data) const {
67+
void fft(u32 lg, u32 *data) const {
6868
const auto mt = this->mt; // ! to put Montgomery constants in registers
6969
const auto mts = this->mts; // ! to put Montgomery constants in registers
7070
u32 n = 1 << lg, k = lg;
@@ -133,7 +133,7 @@ struct NTT_AVX512F {
133133
// output data[i] in [0, mod)
134134
// fc (if specified) should be in [0, mod)
135135
// if fc is specified everything is multiplied by fc
136-
[[gnu::noinline]] __attribute__((optimize("O3"))) void ifft(u32 lg, u32 *data, u32 fc = -1u) const {
136+
void ifft(u32 lg, u32 *data, u32 fc = -1u) const {
137137
const auto mt = this->mt; // ! to put Montgomery constants in registers
138138
const auto mts = this->mts; // ! to put Montgomery constants in registers
139139
if (fc == -1u) fc = mt.r;
@@ -192,7 +192,7 @@ struct NTT_AVX512F {
192192
}
193193
}
194194

195-
__attribute__((optimize("O3"))) vec<u32> conv_slow(vec<u32> a, vec<u32> b) const {
195+
vec<u32> conv_slow(vec<u32> a, vec<u32> b) const {
196196
u32 sz = std::max<u32>(0, u32(a.size() + b.size() - 1));
197197
const auto mt = this->mt; // ! to put Montgomery constants in registers
198198
vec<u32> c(sz);
@@ -206,7 +206,7 @@ struct NTT_AVX512F {
206206

207207
// a and b should be 64-byte aligned
208208
// writes (a * b) to a
209-
[[gnu::noinline]] __attribute__((optimize("O3"))) void conv(u32 lg, __restrict__ pu32 a, __restrict__ pu32 b) const {
209+
void conv(u32 lg, __restrict__ pu32 a, __restrict__ pu32 b) const {
210210
if (lg <= 4) {
211211
u32 n = (1 << lg);
212212
__restrict__ pu32 c = (pu32)_mm_malloc(n * 4, 4);
@@ -227,7 +227,7 @@ struct NTT_AVX512F {
227227
ifft(lg, a, mt.r2);
228228
}
229229

230-
__attribute__((optimize("O3"))) vec<u32> conv(vec<u32> const &a, vec<u32> const &b) const {
230+
vec<u32> conv(vec<u32> const &a, vec<u32> const &b) const {
231231
u32 sz = std::max<u32>(0, u32(a.size() + b.size() - 1));
232232
u32 lg = u32(std::__lg(std::max<u32>(1, sz - 1)) + 1);
233233
pu32 ap = (pu32)_mm_malloc((usz)std::max(64, (1 << lg) * 4), 64);

src/code/util/simd_avx512f.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define TIFALIBS_UTIL_SIMD_AVX512F
33

44
#pragma GCC target("avx512f")
5+
#pragma GCC optimize("O3,unroll-loops")
56
#include <immintrin.h>
67

78
#include "util.hpp"

0 commit comments

Comments
 (0)