Skip to content

Commit

Permalink
odp-execute: Add ISA implementation of pop_vlan action.
Browse files Browse the repository at this point in the history
This commit adds the AVX512 implementation of the
pop_vlan action.

Signed-off-by: Emma Finn <[email protected]>
Signed-off-by: Harry van Haaren <[email protected]>
Co-authored-by: Harry van Haaren <[email protected]>
Acked-by: Sunil Pai G <[email protected]>
Acked-by: Eelco Chaudron <[email protected]>
Signed-off-by: Ian Stokes <[email protected]>
  • Loading branch information
2 people authored and istokes committed Jul 15, 2022
1 parent 529af67 commit 398f80f
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 1 deletion.
4 changes: 4 additions & 0 deletions lib/automake.mk
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ lib_libopenvswitchavx512_la_CFLAGS += \
lib_libopenvswitchavx512_la_SOURCES += \
lib/dpif-netdev-extract-avx512.c \
lib/dpif-netdev-lookup-avx512-gather.c
if HAVE_GCC_AVX512VL_GOOD
lib_libopenvswitchavx512_la_SOURCES += \
lib/odp-execute-avx512.c
endif # HAVE_GCC_AVX512VL_GOOD
endif # HAVE_AVX512VL
endif # HAVE_AVX512BW
lib_libopenvswitchavx512_la_LDFLAGS = \
Expand Down
187 changes: 187 additions & 0 deletions lib/odp-execute-avx512.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
/*
* Copyright (c) 2022 Intel.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifdef __x86_64__
/* Sparse cannot handle the AVX512 instructions. */
#if !defined(__CHECKER__)

#include <config.h>
#include <errno.h>

#include "dp-packet.h"
#include "immintrin.h"
#include "odp-execute-private.h"
#include "odp-netlink.h"
#include "openvswitch/vlog.h"

VLOG_DEFINE_THIS_MODULE(odp_execute_avx512);

/* The below three build asserts make sure that l2_5_ofs, l3_ofs, and l4_ofs
* fields remain in the same order and offset to l2_padd_size. This is needed
* as the avx512_dp_packet_resize_l2() function will manipulate those fields at
* a fixed memory index based on the l2_padd_size offset. */
BUILD_ASSERT_DECL(offsetof(struct dp_packet, l2_pad_size) +
MEMBER_SIZEOF(struct dp_packet, l2_pad_size) ==
offsetof(struct dp_packet, l2_5_ofs));

BUILD_ASSERT_DECL(offsetof(struct dp_packet, l2_5_ofs) +
MEMBER_SIZEOF(struct dp_packet, l2_5_ofs) ==
offsetof(struct dp_packet, l3_ofs));

BUILD_ASSERT_DECL(offsetof(struct dp_packet, l3_ofs) +
MEMBER_SIZEOF(struct dp_packet, l3_ofs) ==
offsetof(struct dp_packet, l4_ofs));

/* The below build assert makes sure it's safe to read/write 128-bits starting
* at the l2_pad_size location. */
BUILD_ASSERT_DECL(sizeof(struct dp_packet) -
offsetof(struct dp_packet, l2_pad_size) >= sizeof(__m128i));

static inline void ALWAYS_INLINE
avx512_dp_packet_resize_l2(struct dp_packet *b, int resize_by_bytes)
{
/* Update packet size/data pointers, same as the scalar implementation. */
if (resize_by_bytes >= 0) {
dp_packet_push_uninit(b, resize_by_bytes);
} else {
dp_packet_pull(b, -resize_by_bytes);
}

/* The next step is to update the l2_5_ofs, l3_ofs and l4_ofs fields which
* the scalar implementation does with the dp_packet_adjust_layer_offset()
* function. */

/* Set the v_zero register to all zero's. */
const __m128i v_zeros = _mm_setzero_si128();

/* Set the v_u16_max register to all one's. */
const __m128i v_u16_max = _mm_cmpeq_epi16(v_zeros, v_zeros);

/* Each lane represents 16 bits in a 12-bit register. In this case the
* first three 16-bit values, which will map to the l2_5_ofs, l3_ofs and
* l4_ofs fields. */
const uint8_t k_lanes = 0b1110;

/* Set all 16-bit words in the 128-bits v_offset register to the value we
* need to add/substract from the l2_5_ofs, l3_ofs, and l4_ofs fields. */
__m128i v_offset = _mm_set1_epi16(abs(resize_by_bytes));

/* Load 128 bits from the dp_packet structure starting at the l2_pad_size
* offset. */
void *adjust_ptr = &b->l2_pad_size;
__m128i v_adjust_src = _mm_loadu_si128(adjust_ptr);

/* Here is the tricky part, we only need to update the value of the three
* fields if they are not UINT16_MAX. The following function will return
* a mask of lanes (read fields) that are not UINT16_MAX. It will do this
* by comparing only the lanes we requested, k_lanes, and if they match
* v_u16_max, the bit will be set. */
__mmask8 k_cmp = _mm_mask_cmpneq_epu16_mask(k_lanes, v_adjust_src,
v_u16_max);

/* Based on the bytes adjust (positive, or negative) it will do the actual
* add or subtraction. These functions will only operate on the lanes
* (fields) requested based on k_cmp, i.e:
* k_cmp = [l2_5_ofs, l3_ofs, l4_ofs]
* for field in kcmp
* v_adjust_src[field] = v_adjust_src[field] + v_offset
*/
__m128i v_adjust_wip;

if (resize_by_bytes >= 0) {
v_adjust_wip = _mm_mask_add_epi16(v_adjust_src, k_cmp,
v_adjust_src, v_offset);
} else {
v_adjust_wip = _mm_mask_sub_epi16(v_adjust_src, k_cmp,
v_adjust_src, v_offset);
}

/* Here we write back the full 128-bits. */
_mm_storeu_si128(adjust_ptr, v_adjust_wip);
}

/* This function performs the same operation on each packet in the batch as
* the scalar eth_pop_vlan() function. */
static void
action_avx512_pop_vlan(struct dp_packet_batch *batch,
const struct nlattr *a OVS_UNUSED)
{
struct dp_packet *packet;

/* Set the v_zero register to all zero's. */
const __m128i v_zeros = _mm_setzero_si128();

DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
struct vlan_eth_header *veh = dp_packet_eth(packet);

if (veh && dp_packet_size(packet) >= sizeof *veh &&
eth_type_vlan(veh->veth_type)) {

/* Load the first 128-bits of l2 header into the v_ether register.
* This result in the veth_dst/src and veth_type/tci of the
* vlan_eth_header structure to be loaded. */
__m128i v_ether = _mm_loadu_si128((void *) veh);

/* This creates a 256-bit value containing the first four fields
* of the vlan_eth_header plus 128 zero-bit. The result will be the
* lowest 128-bits after the right shift, hence we shift the data
* 128(zero)-bits minus the VLAN_HEADER_LEN, so we are left with
* only the veth_dst and veth_src fields. */
__m128i v_realign = _mm_alignr_epi8(v_ether, v_zeros,
sizeof(__m128i) -
VLAN_HEADER_LEN);

/* Write back the modified ethernet header. */
_mm_storeu_si128((void *) veh, v_realign);

/* As we removed the VLAN_HEADER we now need to adjust all the
* offsets. */
avx512_dp_packet_resize_l2(packet, -VLAN_HEADER_LEN);
}
}
}

int
action_avx512_init(struct odp_execute_action_impl *self OVS_UNUSED)
{
if (!action_avx512_isa_probe()) {
return -ENOTSUP;
}

/* Set function pointers for actions that can be applied directly, these
* are identified by OVS_ACTION_ATTR_*. */
self->funcs[OVS_ACTION_ATTR_POP_VLAN] = action_avx512_pop_vlan;
return 0;
}

#endif /* Sparse */

#else /* __x86_64__ */

#include <config.h>
#include <errno.h>
#include "odp-execute-private.h"
/* Function itself is required to be called, even in e.g. 32-bit builds.
* This dummy init function ensures 32-bit builds succeed too.
*/

int
action_avx512_init(struct odp_execute_action_impl *self OVS_UNUSED)
{
return -ENOTSUP;
}

#endif
32 changes: 31 additions & 1 deletion lib/odp-execute-private.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <stdio.h>
#include <string.h>

#include "cpu.h"
#include "dpdk.h"
#include "dp-packet.h"
#include "odp-execute-private.h"
Expand All @@ -29,6 +30,35 @@
VLOG_DEFINE_THIS_MODULE(odp_execute_impl);
static int active_action_impl_index;

#if ACTION_IMPL_AVX512_CHECK
/* Probe functions to check ISA requirements. */
bool
action_avx512_isa_probe(void)
{
static enum ovs_cpu_isa isa_required[] = {
OVS_CPU_ISA_X86_AVX512F,
OVS_CPU_ISA_X86_AVX512BW,
OVS_CPU_ISA_X86_BMI2,
OVS_CPU_ISA_X86_AVX512VL,
};
for (int i = 0; i < ARRAY_SIZE(isa_required); i++) {
if (!cpu_has_isa(isa_required[i])) {
return false;
}
}
return true;
}

#else

bool
action_avx512_isa_probe(void)
{
return false;
}

#endif

static struct odp_execute_action_impl action_impls[] = {
[ACTION_IMPL_AUTOVALIDATOR] = {
.available = false,
Expand All @@ -46,7 +76,7 @@ static struct odp_execute_action_impl action_impls[] = {
[ACTION_IMPL_AVX512] = {
.available = false,
.name = "avx512",
.init_func = NULL,
.init_func = action_avx512_init,
},
#endif
};
Expand Down
4 changes: 4 additions & 0 deletions lib/odp-execute-private.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ BUILD_ASSERT_DECL(ACTION_IMPL_AUTOVALIDATOR == 1);

#define ACTION_IMPL_BEGIN (ACTION_IMPL_AUTOVALIDATOR + 1)

bool action_avx512_isa_probe(void);

/* Odp execute init handles setting up the state of the actions functions at
* initialization time. It cannot return errors, as it must always succeed in
* initializing the scalar/generic codepath. */
Expand All @@ -90,6 +92,8 @@ struct odp_execute_action_impl * odp_execute_action_set(const char *name);

int action_autoval_init(struct odp_execute_action_impl *self);

int action_avx512_init(struct odp_execute_action_impl *self);

void odp_execute_action_get_info(struct ds *name);

#endif /* ODP_EXTRACT_PRIVATE */

0 comments on commit 398f80f

Please sign in to comment.