Skip to content

Commit

Permalink
samples/bpf: xdp2skb_meta shows transferring info from XDP to SKB
Browse files Browse the repository at this point in the history
Creating a bpf sample that shows howto use the XDP 'data_meta'
infrastructure, created by Daniel Borkmann.  Very few drivers support
this feature, but I wanted a functional sample to begin with, when
working on adding driver support.

XDP data_meta is about creating a communication channel between BPF
programs.  This can be XDP tail-progs, but also other SKB based BPF
hooks, like in this case the TC clsact hook. In this sample I show
that XDP can store info named "mark", and TC/clsact chooses to use
this info and store it into the skb->mark.

It is a bit annoying that XDP and TC samples uses different tools/libs
when attaching their BPF hooks.  As the XDP and TC programs need to
cooperate and agree on a struct-layout, it is best/easiest if the two
programs can be contained within the same BPF restricted-C file.

As the bpf-loader, I choose to not use bpf_load.c (or libbpf), but
instead wrote a bash shell scripted named xdp2skb_meta.sh, which
demonstrate howto use the iproute cmdline tools 'tc' and 'ip' for
loading BPF programs.  To make it easy for first time users, the shell
script have command line parsing, and support --verbose and --dry-run
mode, if you just want to see/learn the tc+ip command syntax:

 # ./xdp2skb_meta.sh --dev ixgbe2 --dry-run
 # Dry-run mode: enable VERBOSE and don't call TC+IP
 tc qdisc del dev ixgbe2 clsact
 tc qdisc add dev ixgbe2 clsact
 tc filter add dev ixgbe2 ingress prio 1 handle 1 bpf da obj ./xdp2skb_meta_kern.o sec tc_mark
 # Flush XDP on device: ixgbe2
 ip link set dev ixgbe2 xdp off
 ip link set dev ixgbe2 xdp obj ./xdp2skb_meta_kern.o sec xdp_mark

Signed-off-by: Jesper Dangaard Brouer <[email protected]>
Signed-off-by: Daniel Borkmann <[email protected]>
  • Loading branch information
netoptimizer authored and borkmann committed Jan 11, 2018
1 parent 632130e commit 36e04a2
Show file tree
Hide file tree
Showing 3 changed files with 324 additions and 0 deletions.
1 change: 1 addition & 0 deletions samples/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o
always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
always += xdp_rxq_info_kern.o
always += xdp2skb_meta_kern.o
always += syscall_tp_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
Expand Down
220 changes: 220 additions & 0 deletions samples/bpf/xdp2skb_meta.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
#!/bin/bash
#
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
#
# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load
# eBPF programs, both for XDP and clsbpf. Shell script function
# wrappers and even long options parsing is illustrated, for ease of
# use.
#
# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs
# that need to collaborate between XDP and TC hooks. Thus, it is
# convenient that the same tool load both programs that need to work
# together.
#
BPF_FILE=xdp2skb_meta_kern.o
DIR=$(dirname $0)

export TC=/usr/sbin/tc
export IP=/usr/sbin/ip

function usage() {
echo ""
echo "Usage: $0 [-vfh] --dev ethX"
echo " -d | --dev : Network device (required)"
echo " --flush : Cleanup flush TC and XDP progs"
echo " --list : (\$LIST) List TC and XDP progs"
echo " -v | --verbose : (\$VERBOSE) Verbose"
echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)"
echo ""
}

## -- General shell logging cmds --
function err() {
local exitcode=$1
shift
echo "ERROR: $@" >&2
exit $exitcode
}

function info() {
if [[ -n "$VERBOSE" ]]; then
echo "# $@"
fi
}

## -- Helper function calls --

# Wrapper call for TC and IP
# - Will display the offending command on failure
function _call_cmd() {
local cmd="$1"
local allow_fail="$2"
shift 2
if [[ -n "$VERBOSE" ]]; then
echo "$(basename $cmd) $@"
fi
if [[ -n "$DRYRUN" ]]; then
return
fi
$cmd "$@"
local status=$?
if (( $status != 0 )); then
if [[ "$allow_fail" == "" ]]; then
err 2 "Exec error($status) occurred cmd: \"$cmd $@\""
fi
fi
}
function call_tc() {
_call_cmd "$TC" "" "$@"
}
function call_tc_allow_fail() {
_call_cmd "$TC" "allow_fail" "$@"
}
function call_ip() {
_call_cmd "$IP" "" "$@"
}

## --- Parse command line arguments / parameters ---
# Using external program "getopt" to get --long-options
OPTIONS=$(getopt -o vfhd: \
--long verbose,flush,help,list,dev:,dry-run -- "$@")
if (( $? != 0 )); then
err 4 "Error calling getopt"
fi
eval set -- "$OPTIONS"

unset DEV
unset FLUSH
while true; do
case "$1" in
-d | --dev ) # device
DEV=$2
info "Device set to: DEV=$DEV" >&2
shift 2
;;
-v | --verbose)
VERBOSE=yes
# info "Verbose mode: VERBOSE=$VERBOSE" >&2
shift
;;
--dry-run )
DRYRUN=yes
VERBOSE=yes
info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2
shift
;;
-f | --flush )
FLUSH=yes
shift
;;
--list )
LIST=yes
shift
;;
-- )
shift
break
;;
-h | --help )
usage;
exit 0
;;
* )
shift
break
;;
esac
done

FILE="$DIR/$BPF_FILE"
if [[ ! -e $FILE ]]; then
err 3 "Missing BPF object file ($FILE)"
fi

if [[ -z $DEV ]]; then
usage
err 2 "Please specify network device -- required option --dev"
fi

## -- Function calls --

function list_tc()
{
local device="$1"
shift
info "Listing current TC ingress rules"
call_tc filter show dev $device ingress
}

function list_xdp()
{
local device="$1"
shift
info "Listing current XDP device($device) setting"
call_ip link show dev $device | grep --color=auto xdp
}

function flush_tc()
{
local device="$1"
shift
info "Flush TC on device: $device"
call_tc_allow_fail filter del dev $device ingress
call_tc_allow_fail qdisc del dev $device clsact
}

function flush_xdp()
{
local device="$1"
shift
info "Flush XDP on device: $device"
call_ip link set dev $device xdp off
}

function attach_tc_mark()
{
local device="$1"
local file="$2"
local prog="tc_mark"
shift 2

# Re-attach clsact to clear/flush existing role
call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null
call_tc qdisc add dev $device clsact

# Attach BPF prog
call_tc filter add dev $device ingress \
prio 1 handle 1 bpf da obj $file sec $prog
}

function attach_xdp_mark()
{
local device="$1"
local file="$2"
local prog="xdp_mark"
shift 2

# Remove XDP prog in-case it's already loaded
# TODO: Need ip-link option to override/replace existing XDP prog
flush_xdp $device

# Attach XDP/BPF prog
call_ip link set dev $device xdp obj $file sec $prog
}

if [[ -n $FLUSH ]]; then
flush_tc $DEV
flush_xdp $DEV
exit 0
fi

if [[ -n $LIST ]]; then
list_tc $DEV
list_xdp $DEV
exit 0
fi

attach_tc_mark $DEV $FILE
attach_xdp_mark $DEV $FILE
103 changes: 103 additions & 0 deletions samples/bpf/xdp2skb_meta_kern.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/* SPDX-License-Identifier: GPL-2.0
* Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
*
* Example howto transfer info from XDP to SKB, e.g. skb->mark
* -----------------------------------------------------------
* This uses the XDP data_meta infrastructure, and is a cooperation
* between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook.
*
* Notice: This example does not use the BPF C-loader (bpf_load.c),
* but instead rely on the iproute2 TC tool for loading BPF-objects.
*/
#include <uapi/linux/bpf.h>
#include <uapi/linux/pkt_cls.h>

#include "bpf_helpers.h"

/*
* This struct is stored in the XDP 'data_meta' area, which is located
* just in-front-of the raw packet payload data. The meaning is
* specific to these two BPF programs that use it as a communication
* channel. XDP adjust/increase the area via a bpf-helper, and TC use
* boundary checks to see if data have been provided.
*
* The struct must be 4 byte aligned, which here is enforced by the
* struct __attribute__((aligned(4))).
*/
struct meta_info {
__u32 mark;
} __attribute__((aligned(4)));

SEC("xdp_mark")
int _xdp_mark(struct xdp_md *ctx)
{
struct meta_info *meta;
void *data, *data_end;
int ret;

/* Reserve space in-front data pointer for our meta info.
* (Notice drivers not supporting data_meta will fail here!)
*/
ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
if (ret < 0)
return XDP_ABORTED;

/* For some unknown reason, these ctx pointers must be read
* after bpf_xdp_adjust_meta, else verifier will reject prog.
*/
data = (void *)(unsigned long)ctx->data;

/* Check data_meta have room for meta_info struct */
meta = (void *)(unsigned long)ctx->data_meta;
if (meta + 1 > data)
return XDP_ABORTED;

meta->mark = 42;

return XDP_PASS;
}

SEC("tc_mark")
int _tc_mark(struct __sk_buff *ctx)
{
void *data = (void *)(unsigned long)ctx->data;
void *data_end = (void *)(unsigned long)ctx->data_end;
void *data_meta = (void *)(unsigned long)ctx->data_meta;
struct meta_info *meta = data_meta;

/* Check XDP gave us some data_meta */
if (meta + 1 > data) {
ctx->mark = 41;
/* Skip "accept" if no data_meta is avail */
return TC_ACT_OK;
}

/* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */
ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */

return TC_ACT_OK;
}

/* Manually attaching these programs:
export DEV=ixgbe2
export FILE=xdp2skb_meta_kern.o
# via TC command
tc qdisc del dev $DEV clsact 2> /dev/null
tc qdisc add dev $DEV clsact
tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark
tc filter show dev $DEV ingress
# XDP via IP command:
ip link set dev $DEV xdp off
ip link set dev $DEV xdp obj $FILE sec xdp_mark
# Use iptable to "see" if SKBs are marked
iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29
iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a
# Hint: catch XDP_ABORTED errors via
perf record -e xdp:*
perf script
*/

0 comments on commit 36e04a2

Please sign in to comment.