@@ -17,7 +17,7 @@ struct NTT_AVX512F {
17
17
Montgomery mt;
18
18
u32 mod, g;
19
19
20
- [[gnu::noinline]] u32 power (u32 base, u32 exp) const {
20
+ u32 power (u32 base, u32 exp) const {
21
21
const auto mt = this ->mt ; // ! to put Montgomery constants in registers
22
22
u32 res = mt.r ;
23
23
for (; exp > 0 ; exp >>= 1 ) {
@@ -64,7 +64,7 @@ struct NTT_AVX512F {
64
64
}
65
65
// input data[i] in [0, 2 * mod)
66
66
// output data[i] in [0, 4 * mod)
67
- [[gnu::noinline]] __attribute__((optimize( " O3 " ))) void fft (u32 lg, u32 *data) const {
67
+ void fft (u32 lg, u32 *data) const {
68
68
const auto mt = this ->mt ; // ! to put Montgomery constants in registers
69
69
const auto mts = this ->mts ; // ! to put Montgomery constants in registers
70
70
u32 n = 1 << lg, k = lg;
@@ -133,7 +133,7 @@ struct NTT_AVX512F {
133
133
// output data[i] in [0, mod)
134
134
// fc (if specified) should be in [0, mod)
135
135
// if fc is specified everything is multiplied by fc
136
- [[gnu::noinline]] __attribute__((optimize( " O3 " ))) void ifft (u32 lg, u32 *data, u32 fc = -1u ) const {
136
+ void ifft (u32 lg, u32 *data, u32 fc = -1u ) const {
137
137
const auto mt = this ->mt ; // ! to put Montgomery constants in registers
138
138
const auto mts = this ->mts ; // ! to put Montgomery constants in registers
139
139
if (fc == -1u ) fc = mt.r ;
@@ -192,7 +192,7 @@ struct NTT_AVX512F {
192
192
}
193
193
}
194
194
195
- __attribute__ ((optimize( " O3 " ))) vec<u32> conv_slow (vec<u32> a, vec<u32> b) const {
195
+ vec<u32> conv_slow (vec<u32> a, vec<u32> b) const {
196
196
u32 sz = std::max<u32>(0 , u32 (a.size () + b.size () - 1 ));
197
197
const auto mt = this ->mt ; // ! to put Montgomery constants in registers
198
198
vec<u32> c (sz);
@@ -206,7 +206,7 @@ struct NTT_AVX512F {
206
206
207
207
// a and b should be 64-byte aligned
208
208
// writes (a * b) to a
209
- [[gnu::noinline]] __attribute__((optimize( " O3 " ))) void conv (u32 lg, __restrict__ pu32 a, __restrict__ pu32 b) const {
209
+ void conv (u32 lg, __restrict__ pu32 a, __restrict__ pu32 b) const {
210
210
if (lg <= 4 ) {
211
211
u32 n = (1 << lg);
212
212
__restrict__ pu32 c = (pu32)_mm_malloc (n * 4 , 4 );
@@ -227,7 +227,7 @@ struct NTT_AVX512F {
227
227
ifft (lg, a, mt.r2 );
228
228
}
229
229
230
- __attribute__ ((optimize( " O3 " ))) vec<u32> conv (vec<u32> const &a, vec<u32> const &b) const {
230
+ vec<u32> conv (vec<u32> const &a, vec<u32> const &b) const {
231
231
u32 sz = std::max<u32>(0 , u32 (a.size () + b.size () - 1 ));
232
232
u32 lg = u32 (std::__lg (std::max<u32>(1 , sz - 1 )) + 1 );
233
233
pu32 ap = (pu32)_mm_malloc ((usz)std::max (64 , (1 << lg) * 4 ), 64 );
0 commit comments