diff --git a/src/mpn_extras.h b/src/mpn_extras.h index 78449a4d18..4c35125dd1 100644 --- a/src/mpn_extras.h +++ b/src/mpn_extras.h @@ -131,9 +131,9 @@ flint_mpn_get_d(mp_srcptr ptr, mp_size_t size, mp_size_t sign, long exp); #endif #if FLINT_HAVE_ADX -#define FLINT_MPN_MUL_FUNC_TAB_WIDTH 9 -#define FLINT_HAVE_MUL_FUNC(n, m) ((n) <= 8 || ((n) <= 16 && (m) <= 8)) -#define FLINT_HAVE_MUL_N_FUNC(n) ((n) <= 8) +#define FLINT_MPN_MUL_FUNC_TAB_WIDTH 17 +#define FLINT_HAVE_MUL_FUNC(n, m) ((n) <= 16) +#define FLINT_HAVE_MUL_N_FUNC(n) ((n) <= 16) #define FLINT_HAVE_SQR_FUNC(n) ((n) <= 7) #else #define FLINT_MPN_MUL_FUNC_TAB_WIDTH 8 @@ -251,9 +251,11 @@ MPN_EXTRAS_INLINE mp_limb_t flint_mpn_mulhigh_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { FLINT_ASSERT(n >= 1); - FLINT_ASSERT(FLINT_HAVE_MULHIGH_N_FUNC(n)); - return flint_mpn_mulhigh_n_func_tab[n - 1](rp, xp, yp); + if (FLINT_HAVE_MULHIGH_N_FUNC(n)) + return flint_mpn_mulhigh_n_func_tab[n](rp, xp, yp); + else + return flint_mpn_mulhigh_basecase(rp, xp, yp, n); } FLINT_FORCE_INLINE @@ -262,7 +264,7 @@ struct mp_limb_pair_t flint_mpn_mulhigh_normalised_n(mp_ptr rp, mp_srcptr xp, mp FLINT_ASSERT(n >= 1); FLINT_ASSERT(FLINT_HAVE_MULHIGH_N_FUNC(n)); - return flint_mpn_mulhigh_normalised_n_func_tab[n - 1](rp, xp, yp); + return flint_mpn_mulhigh_normalised_n_func_tab[n](rp, xp, yp); } /* diff --git a/src/mpn_extras/mul_basecase.c b/src/mpn_extras/mul_basecase.c index d4c99a5c50..c038e9bbee 100644 --- a/src/mpn_extras/mul_basecase.c +++ b/src/mpn_extras/mul_basecase.c @@ -130,6 +130,335 @@ mp_limb_t flint_mpn_mul_16_6(mp_ptr, mp_srcptr, mp_srcptr); mp_limb_t flint_mpn_mul_16_7(mp_ptr, mp_srcptr, mp_srcptr); mp_limb_t flint_mpn_mul_16_8(mp_ptr, mp_srcptr, mp_srcptr); +mp_limb_t flint_mpn_mul_9_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_9_8(res, u, v); + res[17] = mpn_addmul_1(res + 8, u, 9, v[8]); + return res[17]; +} + +mp_limb_t flint_mpn_mul_10_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_10_8(res, u, v); + res[18] = mpn_addmul_1(res + 8, u, 10, v[8]); + return res[18]; +} + +mp_limb_t flint_mpn_mul_11_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_11_8(res, u, v); + res[19] = mpn_addmul_1(res + 8, u, 11, v[8]); + return res[19]; +} + +mp_limb_t flint_mpn_mul_12_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_12_8(res, u, v); + res[20] = mpn_addmul_1(res + 8, u, 12, v[8]); + return res[20]; +} + +mp_limb_t flint_mpn_mul_13_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_13_8(res, u, v); + res[21] = mpn_addmul_1(res + 8, u, 13, v[8]); + return res[21]; +} + +mp_limb_t flint_mpn_mul_14_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_14_8(res, u, v); + res[22] = mpn_addmul_1(res + 8, u, 14, v[8]); + return res[22]; +} + +mp_limb_t flint_mpn_mul_15_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_15_8(res, u, v); + res[23] = mpn_addmul_1(res + 8, u, 15, v[8]); + return res[23]; +} + +mp_limb_t flint_mpn_mul_16_9(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_16_8(res, u, v); + res[24] = mpn_addmul_1(res + 8, u, 16, v[8]); + return res[24]; +} + + +mp_limb_t flint_mpn_mul_10_10(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_10_8(res, u, v); + res[18] = mpn_addmul_1(res + 8, u, 10, v[8]); + res[19] = mpn_addmul_1(res + 9, u, 10, v[9]); + return res[19]; +} + +mp_limb_t flint_mpn_mul_11_10(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_11_8(res, u, v); + res[19] = mpn_addmul_1(res + 8, u, 11, v[8]); + res[20] = mpn_addmul_1(res + 9, u, 11, v[9]); + return res[20]; +} + +mp_limb_t flint_mpn_mul_12_10(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_12_8(res, u, v); + res[20] = mpn_addmul_1(res + 8, u, 12, v[8]); + res[21] = mpn_addmul_1(res + 9, u, 12, v[9]); + return res[21]; +} + +mp_limb_t flint_mpn_mul_13_10(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_13_8(res, u, v); + res[21] = mpn_addmul_1(res + 8, u, 13, v[8]); + res[22] = mpn_addmul_1(res + 9, u, 13, v[9]); + return res[22]; +} + +mp_limb_t flint_mpn_mul_14_10(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_14_8(res, u, v); + res[22] = mpn_addmul_1(res + 8, u, 14, v[8]); + res[23] = mpn_addmul_1(res + 9, u, 14, v[9]); + return res[23]; +} + +mp_limb_t flint_mpn_mul_15_10(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_15_8(res, u, v); + res[23] = mpn_addmul_1(res + 8, u, 15, v[8]); + res[24] = mpn_addmul_1(res + 9, u, 15, v[9]); + return res[24]; +} + +mp_limb_t flint_mpn_mul_16_10(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_16_8(res, u, v); + res[24] = mpn_addmul_1(res + 8, u, 16, v[8]); + res[25] = mpn_addmul_1(res + 9, u, 16, v[9]); + return res[25]; +} + + +mp_limb_t flint_mpn_mul_11_11(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_11_8(res, u, v); + res[19] = mpn_addmul_1(res + 8, u, 11, v[8]); + res[20] = mpn_addmul_1(res + 9, u, 11, v[9]); + res[21] = mpn_addmul_1(res + 10, u, 11, v[10]); + return res[21]; +} + +mp_limb_t flint_mpn_mul_12_11(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_12_8(res, u, v); + res[20] = mpn_addmul_1(res + 8, u, 12, v[8]); + res[21] = mpn_addmul_1(res + 9, u, 12, v[9]); + res[22] = mpn_addmul_1(res + 10, u, 12, v[10]); + return res[22]; +} + +mp_limb_t flint_mpn_mul_13_11(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_13_8(res, u, v); + res[21] = mpn_addmul_1(res + 8, u, 13, v[8]); + res[22] = mpn_addmul_1(res + 9, u, 13, v[9]); + res[23] = mpn_addmul_1(res + 10, u, 13, v[10]); + return res[23]; +} + +mp_limb_t flint_mpn_mul_14_11(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_14_8(res, u, v); + res[22] = mpn_addmul_1(res + 8, u, 14, v[8]); + res[23] = mpn_addmul_1(res + 9, u, 14, v[9]); + res[24] = mpn_addmul_1(res + 10, u, 14, v[10]); + return res[24]; +} + +mp_limb_t flint_mpn_mul_15_11(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_15_8(res, u, v); + res[23] = mpn_addmul_1(res + 8, u, 15, v[8]); + res[24] = mpn_addmul_1(res + 9, u, 15, v[9]); + res[25] = mpn_addmul_1(res + 10, u, 15, v[10]); + return res[25]; +} + +mp_limb_t flint_mpn_mul_16_11(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_16_8(res, u, v); + res[24] = mpn_addmul_1(res + 8, u, 16, v[8]); + res[25] = mpn_addmul_1(res + 9, u, 16, v[9]); + res[26] = mpn_addmul_1(res + 10, u, 16, v[10]); + return res[26]; +} + + +mp_limb_t flint_mpn_mul_12_12(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_12_8(res, u, v); + res[20] = mpn_addmul_1(res + 8, u, 12, v[8]); + res[21] = mpn_addmul_1(res + 9, u, 12, v[9]); + res[22] = mpn_addmul_1(res + 10, u, 12, v[10]); + res[23] = mpn_addmul_1(res + 11, u, 12, v[11]); + return res[23]; +} + +mp_limb_t flint_mpn_mul_13_12(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_13_8(res, u, v); + res[21] = mpn_addmul_1(res + 8, u, 13, v[8]); + res[22] = mpn_addmul_1(res + 9, u, 13, v[9]); + res[23] = mpn_addmul_1(res + 10, u, 13, v[10]); + res[24] = mpn_addmul_1(res + 11, u, 13, v[11]); + return res[24]; +} + +mp_limb_t flint_mpn_mul_14_12(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_14_8(res, u, v); + res[22] = mpn_addmul_1(res + 8, u, 14, v[8]); + res[23] = mpn_addmul_1(res + 9, u, 14, v[9]); + res[24] = mpn_addmul_1(res + 10, u, 14, v[10]); + res[25] = mpn_addmul_1(res + 11, u, 14, v[11]); + return res[25]; +} + +mp_limb_t flint_mpn_mul_15_12(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_15_8(res, u, v); + res[23] = mpn_addmul_1(res + 8, u, 15, v[8]); + res[24] = mpn_addmul_1(res + 9, u, 15, v[9]); + res[25] = mpn_addmul_1(res + 10, u, 15, v[10]); + res[26] = mpn_addmul_1(res + 11, u, 15, v[11]); + return res[26]; +} + +mp_limb_t flint_mpn_mul_16_12(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_16_8(res, u, v); + res[24] = mpn_addmul_1(res + 8, u, 16, v[8]); + res[25] = mpn_addmul_1(res + 9, u, 16, v[9]); + res[26] = mpn_addmul_1(res + 10, u, 16, v[10]); + res[27] = mpn_addmul_1(res + 11, u, 16, v[11]); + return res[27]; +} + +mp_limb_t flint_mpn_mul_13_13(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_13_8(res, u, v); + res[21] = mpn_addmul_1(res + 8, u, 13, v[8]); + res[22] = mpn_addmul_1(res + 9, u, 13, v[9]); + res[23] = mpn_addmul_1(res + 10, u, 13, v[10]); + res[24] = mpn_addmul_1(res + 11, u, 13, v[11]); + res[25] = mpn_addmul_1(res + 12, u, 13, v[12]); + return res[25]; +} + +mp_limb_t flint_mpn_mul_14_13(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_14_8(res, u, v); + res[22] = mpn_addmul_1(res + 8, u, 14, v[8]); + res[23] = mpn_addmul_1(res + 9, u, 14, v[9]); + res[24] = mpn_addmul_1(res + 10, u, 14, v[10]); + res[25] = mpn_addmul_1(res + 11, u, 14, v[11]); + res[26] = mpn_addmul_1(res + 12, u, 14, v[12]); + return res[26]; +} + +mp_limb_t flint_mpn_mul_15_13(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_15_8(res, u, v); + res[23] = mpn_addmul_1(res + 8, u, 15, v[8]); + res[24] = mpn_addmul_1(res + 9, u, 15, v[9]); + res[25] = mpn_addmul_1(res + 10, u, 15, v[10]); + res[26] = mpn_addmul_1(res + 11, u, 15, v[11]); + res[27] = mpn_addmul_1(res + 12, u, 15, v[12]); + return res[27]; +} + +mp_limb_t flint_mpn_mul_16_13(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_16_8(res, u, v); + res[24] = mpn_addmul_1(res + 8, u, 16, v[8]); + res[25] = mpn_addmul_1(res + 9, u, 16, v[9]); + res[26] = mpn_addmul_1(res + 10, u, 16, v[10]); + res[27] = mpn_addmul_1(res + 11, u, 16, v[11]); + res[28] = mpn_addmul_1(res + 12, u, 16, v[12]); + return res[28]; +} + +mp_limb_t flint_mpn_mul_14_14(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_14_8(res, u, v); + res[22] = mpn_addmul_1(res + 8, u, 14, v[8]); + res[23] = mpn_addmul_1(res + 9, u, 14, v[9]); + res[24] = mpn_addmul_1(res + 10, u, 14, v[10]); + res[25] = mpn_addmul_1(res + 11, u, 14, v[11]); + res[26] = mpn_addmul_1(res + 12, u, 14, v[12]); + res[27] = mpn_addmul_1(res + 13, u, 14, v[13]); + return res[27]; +} + +mp_limb_t flint_mpn_mul_15_14(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_15_8(res, u, v); + res[23] = mpn_addmul_1(res + 8, u, 15, v[8]); + res[24] = mpn_addmul_1(res + 9, u, 15, v[9]); + res[25] = mpn_addmul_1(res + 10, u, 15, v[10]); + res[26] = mpn_addmul_1(res + 11, u, 15, v[11]); + res[27] = mpn_addmul_1(res + 12, u, 15, v[12]); + res[28] = mpn_addmul_1(res + 13, u, 15, v[13]); + return res[28]; +} + +mp_limb_t flint_mpn_mul_16_14(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + flint_mpn_mul_16_8(res, u, v); + res[24] = mpn_addmul_1(res + 8, u, 16, v[8]); + res[25] = mpn_addmul_1(res + 9, u, 16, v[9]); + res[26] = mpn_addmul_1(res + 10, u, 16, v[10]); + res[27] = mpn_addmul_1(res + 11, u, 16, v[11]); + res[28] = mpn_addmul_1(res + 12, u, 16, v[12]); + res[29] = mpn_addmul_1(res + 13, u, 16, v[13]); + return res[29]; +} + +mp_limb_t flint_mpn_mul_15_15(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + mp_limb_t tmp[22], cy; + flint_mpn_mul_15_8(res, u, v); + flint_mpn_mul_15_7(tmp, u, v + 8); + cy = mpn_add_n(res + 8, res + 8, tmp, 15); + mpn_add_1(res + 23, tmp + 15, 7, cy); + return res[29]; +} + +mp_limb_t flint_mpn_mul_16_15(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + mp_limb_t tmp[23], cy; + flint_mpn_mul_16_8(res, u, v); + flint_mpn_mul_16_7(tmp, u, v + 8); + cy = mpn_add_n(res + 8, res + 8, tmp, 16); + mpn_add_1(res + 24, tmp + 16, 7, cy); + return res[30]; +} + +mp_limb_t flint_mpn_mul_16_16(mp_ptr res, mp_srcptr u, mp_srcptr v) +{ + mp_limb_t tmp[24], cy; + flint_mpn_mul_16_8(res, u, v); + flint_mpn_mul_16_8(tmp, u, v + 8); + cy = mpn_add_n(res + 8, res + 8, tmp, 16); + mpn_add_1(res + 24, tmp + 16, 8, cy); + return res[31]; +} + #else mp_limb_t flint_mpn_mul_1_1(mp_ptr res, mp_srcptr u, mp_srcptr v) @@ -645,23 +974,23 @@ mp_limb_t flint_mpn_mul_14_1(mp_ptr res, mp_srcptr u, mp_srcptr v) #if FLINT_HAVE_ADX const flint_mpn_mul_func_t flint_mpn_mul_func_tab[][FLINT_MPN_MUL_FUNC_TAB_WIDTH] = { - {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, - {NULL, MUL( 1,1), NULL, NULL, NULL, NULL, NULL, NULL, NULL}, - {NULL, MUL( 2,1), MUL( 2,2), NULL, NULL, NULL, NULL, NULL, NULL}, - {NULL, MUL( 3,1), MUL( 3,2), MUL( 3,3), NULL, NULL, NULL, NULL, NULL}, - {NULL, MUL( 4,1), MUL( 4,2), MUL( 4,3), MUL( 4,4), NULL, NULL, NULL, NULL}, - {NULL, MUL( 5,1), MUL( 5,2), MUL( 5,3), MUL( 5,4), MUL( 5,5), NULL, NULL, NULL}, - {NULL, MUL( 6,1), MUL( 6,2), MUL( 6,3), MUL( 6,4), MUL( 6,5), MUL( 6,6), NULL, NULL}, - {NULL, MUL( 7,1), MUL( 7,2), MUL( 7,3), MUL( 7,4), MUL( 7,5), MUL( 7,6), MUL( 7,7), NULL}, - {NULL, MUL( 8,1), MUL( 8,2), MUL( 8,3), MUL( 8,4), MUL( 8,5), MUL( 8,6), MUL( 8,7), MUL( 8,8)}, - {NULL, MUL( 9,1), MUL( 9,2), MUL( 9,3), MUL( 9,4), MUL( 9,5), MUL( 9,6), MUL( 9,7), MUL( 9,8)}, - {NULL, MUL(10,1), MUL(10,2), MUL(10,3), MUL(10,4), MUL(10,5), MUL(10,6), MUL(10,7), MUL(10,8)}, - {NULL, MUL(11,1), MUL(11,2), MUL(11,3), MUL(11,4), MUL(11,5), MUL(11,6), MUL(11,7), MUL(11,8)}, - {NULL, MUL(12,1), MUL(12,2), MUL(12,3), MUL(12,4), MUL(12,5), MUL(12,6), MUL(12,7), MUL(12,8)}, - {NULL, MUL(13,1), MUL(13,2), MUL(13,3), MUL(13,4), MUL(13,5), MUL(13,6), MUL(13,7), MUL(13,8)}, - {NULL, MUL(14,1), MUL(14,2), MUL(14,3), MUL(14,4), MUL(14,5), MUL(14,6), MUL(14,7), MUL(14,8)}, - {NULL, MUL(15,1), MUL(15,2), MUL(15,3), MUL(15,4), MUL(15,5), MUL(15,6), MUL(15,7), MUL(15,8)}, - {NULL, MUL(16,1), MUL(16,2), MUL(16,3), MUL(16,4), MUL(16,5), MUL(16,6), MUL(16,7), MUL(16,8)}, + {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 1,1), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 2,1), MUL( 2,2), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 3,1), MUL( 3,2), MUL( 3,3), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 4,1), MUL( 4,2), MUL( 4,3), MUL( 4,4), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 5,1), MUL( 5,2), MUL( 5,3), MUL( 5,4), MUL( 5,5), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 6,1), MUL( 6,2), MUL( 6,3), MUL( 6,4), MUL( 6,5), MUL( 6,6), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 7,1), MUL( 7,2), MUL( 7,3), MUL( 7,4), MUL( 7,5), MUL( 7,6), MUL( 7,7), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 8,1), MUL( 8,2), MUL( 8,3), MUL( 8,4), MUL( 8,5), MUL( 8,6), MUL( 8,7), MUL( 8,8), NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL( 9,1), MUL( 9,2), MUL( 9,3), MUL( 9,4), MUL( 9,5), MUL( 9,6), MUL( 9,7), MUL( 9,8), MUL( 9,9), NULL, NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL(10,1), MUL(10,2), MUL(10,3), MUL(10,4), MUL(10,5), MUL(10,6), MUL(10,7), MUL(10,8), MUL(10,9), MUL(10,10), NULL, NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL(11,1), MUL(11,2), MUL(11,3), MUL(11,4), MUL(11,5), MUL(11,6), MUL(11,7), MUL(11,8), MUL(11,9), MUL(11,10), MUL(11,11), NULL, NULL, NULL, NULL, NULL}, + {NULL, MUL(12,1), MUL(12,2), MUL(12,3), MUL(12,4), MUL(12,5), MUL(12,6), MUL(12,7), MUL(12,8), MUL(12,9), MUL(12,10), MUL(12,11), MUL(12,12), NULL, NULL, NULL, NULL}, + {NULL, MUL(13,1), MUL(13,2), MUL(13,3), MUL(13,4), MUL(13,5), MUL(13,6), MUL(13,7), MUL(13,8), MUL(13,9), MUL(13,10), MUL(13,11), MUL(13,12), MUL(13,13), NULL, NULL, NULL}, + {NULL, MUL(14,1), MUL(14,2), MUL(14,3), MUL(14,4), MUL(14,5), MUL(14,6), MUL(14,7), MUL(14,8), MUL(14,9), MUL(14,10), MUL(14,11), MUL(14,12), MUL(14,13), MUL(14,14), NULL, NULL}, + {NULL, MUL(15,1), MUL(15,2), MUL(15,3), MUL(15,4), MUL(15,5), MUL(15,6), MUL(15,7), MUL(15,8), MUL(15,9), MUL(15,10), MUL(15,11), MUL(15,12), MUL(15,13), MUL(15,14), MUL(15,15), NULL}, + {NULL, MUL(16,1), MUL(16,2), MUL(16,3), MUL(16,4), MUL(16,5), MUL(16,6), MUL(16,7), MUL(16,8), MUL(16,9), MUL(16,10), MUL(16,11), MUL(16,12), MUL(16,13), MUL(16,14), MUL(16,15), MUL(16,16)}, }; const flint_mpn_mul_func_t flint_mpn_mul_n_func_tab[] = { @@ -674,6 +1003,14 @@ const flint_mpn_mul_func_t flint_mpn_mul_n_func_tab[] = { MUL( 6, 6), MUL( 7, 7), MUL( 8, 8), + MUL( 9, 9), + MUL( 10, 10), + MUL( 11, 11), + MUL( 12, 12), + MUL( 13, 13), + MUL( 14, 14), + MUL( 15, 15), + MUL( 16, 16), }; #else diff --git a/src/mpn_extras/mulhigh.c b/src/mpn_extras/mulhigh.c index 866f882703..e3cf2b3773 100644 --- a/src/mpn_extras/mulhigh.c +++ b/src/mpn_extras/mulhigh.c @@ -40,6 +40,7 @@ struct mp_limb_pair_t flint_mpn_mulhigh_normalised_12(mp_ptr, mp_srcptr, mp_srcp const flint_mpn_mul_func_t flint_mpn_mulhigh_n_func_tab[] = { + NULL, flint_mpn_mulhigh_1, flint_mpn_mulhigh_2, flint_mpn_mulhigh_3, @@ -56,6 +57,7 @@ const flint_mpn_mul_func_t flint_mpn_mulhigh_n_func_tab[] = const flint_mpn_mulhigh_normalised_func_t flint_mpn_mulhigh_normalised_n_func_tab[] = { + NULL, flint_mpn_mulhigh_normalised_1, flint_mpn_mulhigh_normalised_2, flint_mpn_mulhigh_normalised_3, diff --git a/src/mpn_extras/profile/p-mul_n.c b/src/mpn_extras/profile/p-mul_n.c index 29a6c84c05..cb8e78da61 100644 --- a/src/mpn_extras/profile/p-mul_n.c +++ b/src/mpn_extras/profile/p-mul_n.c @@ -13,7 +13,7 @@ #include "mpn_extras.h" #include "profiler.h" -#define MAXN 15 +#define MAXN 16 int main(void) { diff --git a/src/mpn_extras/profile/p-mulhigh_basecase.c b/src/mpn_extras/profile/p-mulhigh_basecase.c index a2ce0325db..09b287fd93 100644 --- a/src/mpn_extras/profile/p-mulhigh_basecase.c +++ b/src/mpn_extras/profile/p-mulhigh_basecase.c @@ -22,7 +22,7 @@ void mpfr_mulhigh_n(mp_ptr, mp_srcptr, mp_srcptr, mp_size_t); int main(void) { -#define N_MIN 6 +#define N_MIN 1 #define N_MAX 64 mp_limb_t rf[N_MAX]; @@ -43,7 +43,7 @@ int main(void) mpn_random2(yp, n); TIMEIT_START - flint_mpn_mulhigh_basecase(rf, xp, yp, n); + flint_mpn_mulhigh_n(rf, xp, yp, n); TIMEIT_STOP_VALUES(__, t1) TIMEIT_START