Skip to content

Commit

Permalink
x86: update AS_* macros to binutils >=2.23, supporting ADX and AVX2
Browse files Browse the repository at this point in the history
Now that the kernel specifies binutils 2.23 as the minimum version, we
can remove ifdefs for AVX2 and ADX throughout.

Signed-off-by: Jason A. Donenfeld <[email protected]>
Acked-by: Ingo Molnar <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Signed-off-by: Masahiro Yamada <[email protected]>
  • Loading branch information
zx2c4 authored and masahir0y committed Apr 8, 2020
1 parent d7e40ea commit e6abef6
Show file tree
Hide file tree
Showing 21 changed files with 15 additions and 90 deletions.
10 changes: 0 additions & 10 deletions arch/x86/Kconfig.assembler
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2020 Jason A. Donenfeld <[email protected]>. All Rights Reserved.

config AS_AVX2
def_bool $(as-instr,vpbroadcastb %xmm0$(comma)%ymm1)
help
Supported by binutils >= 2.22 and LLVM integrated assembler

config AS_AVX512
def_bool $(as-instr,vpmovm2b %k1$(comma)%zmm5)
help
Expand All @@ -20,8 +15,3 @@ config AS_SHA256_NI
def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1)
help
Supported by binutils >= 2.24 and LLVM integrated assembler

config AS_ADX
def_bool $(as-instr,adox %eax$(comma)%eax)
help
Supported by binutils >= 2.23 and LLVM integrated assembler
6 changes: 2 additions & 4 deletions arch/x86/crypto/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,15 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o

obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o
chacha-x86_64-y := chacha-ssse3-x86_64.o chacha_glue.o
chacha-x86_64-$(CONFIG_AS_AVX2) += chacha-avx2-x86_64.o
chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha_glue.o
chacha-x86_64-$(CONFIG_AS_AVX512) += chacha-avx512vl-x86_64.o

obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o
aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o aes_ctrby8_avx-x86_64.o

obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
sha1-ssse3-$(CONFIG_AS_AVX2) += sha1_avx2_x86_64_asm.o
sha1-ssse3-y := sha1_avx2_x86_64_asm.o sha1_ssse3_asm.o sha1_ssse3_glue.o
sha1-ssse3-$(CONFIG_AS_SHA1_NI) += sha1_ni_asm.o

obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
Expand Down
3 changes: 0 additions & 3 deletions arch/x86/crypto/aesni-intel_avx-x86_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -1868,7 +1868,6 @@ key_256_finalize:
ret
SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)

#ifdef CONFIG_AS_AVX2
###############################################################################
# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
# Input: A and B (128-bits each, bit-reflected)
Expand Down Expand Up @@ -2836,5 +2835,3 @@ key_256_finalize4:
FUNC_RESTORE
ret
SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)

#endif /* CONFIG_AS_AVX2 */
7 changes: 0 additions & 7 deletions arch/x86/crypto/aesni-intel_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
.finalize = &aesni_gcm_finalize_avx_gen2,
};

#ifdef CONFIG_AS_AVX2
/*
* asmlinkage void aesni_gcm_init_avx_gen4()
* gcm_data *my_ctx_data, context data
Expand Down Expand Up @@ -276,8 +275,6 @@ static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
.finalize = &aesni_gcm_finalize_avx_gen4,
};

#endif

static inline struct
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
{
Expand Down Expand Up @@ -706,10 +703,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
if (!enc)
left -= auth_tag_len;

#ifdef CONFIG_AS_AVX2
if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4)
gcm_tfm = &aesni_gcm_tfm_avx_gen2;
#endif
if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2)
gcm_tfm = &aesni_gcm_tfm_sse;

Expand Down Expand Up @@ -1069,12 +1064,10 @@ static int __init aesni_init(void)
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
#ifdef CONFIG_X86_64
#ifdef CONFIG_AS_AVX2
if (boot_cpu_has(X86_FEATURE_AVX2)) {
pr_info("AVX2 version of gcm_enc/dec engaged.\n");
aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4;
} else
#endif
if (boot_cpu_has(X86_FEATURE_AVX)) {
pr_info("AVX version of gcm_enc/dec engaged.\n");
aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2;
Expand Down
6 changes: 2 additions & 4 deletions arch/x86/crypto/chacha_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
}

if (IS_ENABLED(CONFIG_AS_AVX2) &&
static_branch_likely(&chacha_use_avx2)) {
if (static_branch_likely(&chacha_use_avx2)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
Expand Down Expand Up @@ -288,8 +287,7 @@ static int __init chacha_simd_mod_init(void)

static_branch_enable(&chacha_use_simd);

if (IS_ENABLED(CONFIG_AS_AVX2) &&
boot_cpu_has(X86_FEATURE_AVX) &&
if (boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
static_branch_enable(&chacha_use_avx2);
Expand Down
8 changes: 0 additions & 8 deletions arch/x86/crypto/poly1305-x86_64-cryptogams.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1514,10 +1514,6 @@ sub poly1305_iteration {

if ($avx>1) {

if ($kernel) {
$code .= "#ifdef CONFIG_AS_AVX2\n";
}

my ($H0,$H1,$H2,$H3,$H4, $MASK, $T4,$T0,$T1,$T2,$T3, $D0,$D1,$D2,$D3,$D4) =
map("%ymm$_",(0..15));
my $S4=$MASK;
Expand Down Expand Up @@ -2808,10 +2804,6 @@ sub poly1305_blocks_avxN {
poly1305_blocks_avxN(0);
&end_function("poly1305_blocks_avx2");

if($kernel) {
$code .= "#endif\n";
}

#######################################################################
if ($avx>2) {
# On entry we have input length divisible by 64. But since inner loop
Expand Down
5 changes: 2 additions & 3 deletions arch/x86/crypto/poly1305_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static void poly1305_simd_blocks(void *ctx, const u8 *inp, size_t len,
kernel_fpu_begin();
if (IS_ENABLED(CONFIG_AS_AVX512) && static_branch_likely(&poly1305_use_avx512))
poly1305_blocks_avx512(ctx, inp, bytes, padbit);
else if (IS_ENABLED(CONFIG_AS_AVX2) && static_branch_likely(&poly1305_use_avx2))
else if (static_branch_likely(&poly1305_use_avx2))
poly1305_blocks_avx2(ctx, inp, bytes, padbit);
else
poly1305_blocks_avx(ctx, inp, bytes, padbit);
Expand Down Expand Up @@ -264,8 +264,7 @@ static int __init poly1305_simd_mod_init(void)
if (boot_cpu_has(X86_FEATURE_AVX) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
static_branch_enable(&poly1305_use_avx);
if (IS_ENABLED(CONFIG_AS_AVX2) && boot_cpu_has(X86_FEATURE_AVX) &&
boot_cpu_has(X86_FEATURE_AVX2) &&
if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) &&
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
static_branch_enable(&poly1305_use_avx2);
if (IS_ENABLED(CONFIG_AS_AVX512) && boot_cpu_has(X86_FEATURE_AVX) &&
Expand Down
6 changes: 0 additions & 6 deletions arch/x86/crypto/sha1_ssse3_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ static void unregister_sha1_avx(void)
crypto_unregister_shash(&sha1_avx_alg);
}

#if defined(CONFIG_AS_AVX2)
#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */

asmlinkage void sha1_transform_avx2(struct sha1_state *state,
Expand Down Expand Up @@ -246,11 +245,6 @@ static void unregister_sha1_avx2(void)
crypto_unregister_shash(&sha1_avx2_alg);
}

#else
static inline int register_sha1_avx2(void) { return 0; }
static inline void unregister_sha1_avx2(void) { }
#endif

#ifdef CONFIG_AS_SHA1_NI
asmlinkage void sha1_ni_transform(struct sha1_state *digest, const u8 *data,
int rounds);
Expand Down
3 changes: 0 additions & 3 deletions arch/x86/crypto/sha256-avx2-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
# This code schedules 2 blocks at a time, with 4 lanes per block
########################################################################

#ifdef CONFIG_AS_AVX2
#include <linux/linkage.h>

## assume buffers not aligned
Expand Down Expand Up @@ -767,5 +766,3 @@ _SHUF_00BA:
.align 32
_SHUF_DC00:
.octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF

#endif
6 changes: 0 additions & 6 deletions arch/x86/crypto/sha256_ssse3_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ static void unregister_sha256_avx(void)
ARRAY_SIZE(sha256_avx_algs));
}

#if defined(CONFIG_AS_AVX2)
asmlinkage void sha256_transform_rorx(struct sha256_state *state,
const u8 *data, int blocks);

Expand Down Expand Up @@ -295,11 +294,6 @@ static void unregister_sha256_avx2(void)
ARRAY_SIZE(sha256_avx2_algs));
}

#else
static inline int register_sha256_avx2(void) { return 0; }
static inline void unregister_sha256_avx2(void) { }
#endif

#ifdef CONFIG_AS_SHA256_NI
asmlinkage void sha256_ni_transform(struct sha256_state *digest,
const u8 *data, int rounds);
Expand Down
3 changes: 0 additions & 3 deletions arch/x86/crypto/sha512-avx2-asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
# This code schedules 1 blocks at a time, with 4 lanes per block
########################################################################

#ifdef CONFIG_AS_AVX2
#include <linux/linkage.h>

.text
Expand Down Expand Up @@ -749,5 +748,3 @@ PSHUFFLE_BYTE_FLIP_MASK:
MASK_YMM_LO:
.octa 0x00000000000000000000000000000000
.octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF

#endif
5 changes: 0 additions & 5 deletions arch/x86/crypto/sha512_ssse3_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ static void unregister_sha512_avx(void)
ARRAY_SIZE(sha512_avx_algs));
}

#if defined(CONFIG_AS_AVX2)
asmlinkage void sha512_transform_rorx(struct sha512_state *state,
const u8 *data, int blocks);

Expand Down Expand Up @@ -293,10 +292,6 @@ static void unregister_sha512_avx2(void)
crypto_unregister_shashes(sha512_avx2_algs,
ARRAY_SIZE(sha512_avx2_algs));
}
#else
static inline int register_sha512_avx2(void) { return 0; }
static inline void unregister_sha512_avx2(void) { }
#endif

static int __init sha512_ssse3_mod_init(void)
{
Expand Down
8 changes: 4 additions & 4 deletions crypto/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ config CRYPTO_CURVE25519

config CRYPTO_CURVE25519_X86
tristate "x86_64 accelerated Curve25519 scalar multiplication library"
depends on X86 && 64BIT && AS_ADX
depends on X86 && 64BIT
select CRYPTO_LIB_CURVE25519_GENERIC
select CRYPTO_ARCH_HAVE_LIB_CURVE25519

Expand Down Expand Up @@ -465,7 +465,7 @@ config CRYPTO_NHPOLY1305_SSE2

config CRYPTO_NHPOLY1305_AVX2
tristate "NHPoly1305 hash function (x86_64 AVX2 implementation)"
depends on X86 && 64BIT && AS_AVX2
depends on X86 && 64BIT
select CRYPTO_NHPOLY1305
help
AVX2 optimized implementation of the hash function used by the
Expand Down Expand Up @@ -1303,7 +1303,7 @@ config CRYPTO_CAMELLIA_AESNI_AVX_X86_64

config CRYPTO_CAMELLIA_AESNI_AVX2_X86_64
tristate "Camellia cipher algorithm (x86_64/AES-NI/AVX2)"
depends on X86 && 64BIT && AS_AVX2
depends on X86 && 64BIT
depends on CRYPTO
select CRYPTO_CAMELLIA_AESNI_AVX_X86_64
help
Expand Down Expand Up @@ -1573,7 +1573,7 @@ config CRYPTO_SERPENT_AVX_X86_64

config CRYPTO_SERPENT_AVX2_X86_64
tristate "Serpent cipher algorithm (x86_64/AVX2)"
depends on X86 && 64BIT && AS_AVX2
depends on X86 && 64BIT
select CRYPTO_SERPENT_AVX_X86_64
help
Serpent cipher algorithm, by Anderson, Biham & Knudsen.
Expand Down
6 changes: 0 additions & 6 deletions lib/raid6/algos.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,8 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_avx512x2,
&raid6_avx512x1,
#endif
#ifdef CONFIG_AS_AVX2
&raid6_avx2x2,
&raid6_avx2x1,
#endif
&raid6_sse2x2,
&raid6_sse2x1,
&raid6_sse1x2,
Expand All @@ -51,11 +49,9 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_avx512x2,
&raid6_avx512x1,
#endif
#ifdef CONFIG_AS_AVX2
&raid6_avx2x4,
&raid6_avx2x2,
&raid6_avx2x1,
#endif
&raid6_sse2x4,
&raid6_sse2x2,
&raid6_sse2x1,
Expand Down Expand Up @@ -101,9 +97,7 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
#ifdef CONFIG_AS_AVX512
&raid6_recov_avx512,
#endif
#ifdef CONFIG_AS_AVX2
&raid6_recov_avx2,
#endif
&raid6_recov_ssse3,
#endif
#ifdef CONFIG_S390
Expand Down
4 changes: 0 additions & 4 deletions lib/raid6/avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
*
*/

#ifdef CONFIG_AS_AVX2

#include <linux/raid/pq.h>
#include "x86.h"

Expand Down Expand Up @@ -470,5 +468,3 @@ const struct raid6_calls raid6_avx2x4 = {
1 /* Has cache hints */
};
#endif

#endif /* CONFIG_AS_AVX2 */
6 changes: 0 additions & 6 deletions lib/raid6/recov_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
* Author: Jim Kukunas <[email protected]>
*/

#ifdef CONFIG_AS_AVX2

#include <linux/raid/pq.h>
#include "x86.h"

Expand Down Expand Up @@ -313,7 +311,3 @@ const struct raid6_recov_calls raid6_recov_avx2 = {
#endif
.priority = 2,
};

#else
#warning "your version of binutils lacks AVX2 support"
#endif
3 changes: 0 additions & 3 deletions lib/raid6/test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,6 @@ endif
ifeq ($(IS_X86),yes)
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
CFLAGS += -DCONFIG_X86
CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
gcc -c -x assembler - >/dev/null 2>&1 && \
rm ./-.o && echo -DCONFIG_AS_AVX2=1)
CFLAGS += $(shell echo "vpmovm2b %k1, %zmm5" | \
gcc -c -x assembler - >/dev/null 2>&1 && \
rm ./-.o && echo -DCONFIG_AS_AVX512=1)
Expand Down
2 changes: 1 addition & 1 deletion net/netfilter/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nft_set_pipapo.o

ifdef CONFIG_X86_64
ifdef CONFIG_AS_AVX2
ifndef CONFIG_UML
nf_tables-objs += nft_set_pipapo_avx2.o
endif
endif
Expand Down
2 changes: 1 addition & 1 deletion net/netfilter/nf_tables_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -3291,7 +3291,7 @@ static const struct nft_set_type *nft_set_types[] = {
&nft_set_rhash_type,
&nft_set_bitmap_type,
&nft_set_rbtree_type,
#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
&nft_set_pipapo_avx2_type,
#endif
&nft_set_pipapo_type,
Expand Down
2 changes: 1 addition & 1 deletion net/netfilter/nft_set_pipapo.c
Original file line number Diff line number Diff line change
Expand Up @@ -2201,7 +2201,7 @@ const struct nft_set_type nft_set_pipapo_type = {
},
};

#if defined(CONFIG_X86_64) && defined(CONFIG_AS_AVX2)
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
const struct nft_set_type nft_set_pipapo_avx2_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT,
Expand Down
4 changes: 2 additions & 2 deletions net/netfilter/nft_set_pipapo_avx2.h
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _NFT_SET_PIPAPO_AVX2_H

#ifdef CONFIG_AS_AVX2
#if defined(CONFIG_X86_64) && !defined(CONFIG_UML)
#include <asm/fpu/xstate.h>
#define NFT_PIPAPO_ALIGN (XSAVE_YMM_SIZE / BITS_PER_BYTE)

bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext);
bool nft_pipapo_avx2_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est);
#endif /* CONFIG_AS_AVX2 */
#endif /* defined(CONFIG_X86_64) && !defined(CONFIG_UML) */

#endif /* _NFT_SET_PIPAPO_AVX2_H */

0 comments on commit e6abef6

Please sign in to comment.