Skip to content

Commit

Permalink
Merge branch 'netdevsim-link'
Browse files Browse the repository at this point in the history
David Wei says:

====================
netdevsim: link and forward skbs between ports

This patchset adds the ability to link two netdevsim ports together and
forward skbs between them, similar to veth. The goal is to use netdevsim
for testing features e.g. zero copy Rx using io_uring.

This feature was tested locally on QEMU, and a selftest is included.

I ran netdev selftests CI style and all tests but the following passed:
- gro.sh
- l2tp.sh
- ip_local_port_range.sh

gro.sh fails because virtme-ng mounts as read-only and it tries to write
to log.txt. This issue was reported to virtme-ng upstream.

l2tp.sh and ip_local_port_range.sh both fail for me on net-next/main as
well.

---
v13->v14:
- implement ndo_get_iflink()
- fix returning 0 if peer is already linked during linking or not linked
  during unlinking
- bump dropped counter if nsim_ipsec_tx() fails and generally reorder
  nsim_start_xmit()
- fix overflowing lines and indentations

v12->v13:
- wait for socat listening port to be ready before sending data in
  selftest

v11->v12:
- fix leaked netns refs
- fix rtnetlink.sh kci_test_ipsec_offload() selftest

v10->v11:
- add udevadm settle after creating netdevsims in selftest

v9->v10:
- fix not freeing skb when not there is no peer
- prevent possible id clashes in selftest
- cleanup selftest on error paths

v8->v9:
- switch to getting netns using fd rather than id
- prevent linking a netdevsim to itself
- update tests

v7->v8:
- fix not dereferencing RCU ptr using rcu_dereference()
- remove unused variables in selftest

v6->v7:
- change link syntax to netnsid:ifidx
- replace dev_get_by_index() with __dev_get_by_index()
- check for NULL peer when linking
- add a sysfs attribute for unlinking
- only update Tx stats if not dropped
- update selftest

v5->v6:
- reworked to link two netdevsims using sysfs attribute on the bus
  device instead of debugfs due to deadlock possibility if a netdevsim
  is removed during linking
- removed unnecessary patch maintaining a list of probed nsim_devs
- updated selftest

v4->v5:
- reduce nsim_dev_list_lock critical section
- fixed missing mutex unlock during unwind ladder
- rework nsim_dev_peer_write synchronization to take devlink lock as
  well as rtnl_lock
- return err msgs to user during linking if port doesn't exist or
  linking to self
- update tx stats outside of RCU lock

v3->v4:
- maintain a mutex protected list of probed nsim_devs instead of using
  nsim_bus_dev
- fixed synchronization issues by taking rtnl_lock
- track tx_dropped skbs

v2->v3:
- take lock when traversing nsim_bus_dev_list
- take device ref when getting a nsim_bus_dev
- return 0 if nsim_dev_peer_read cannot find the port
- address code formatting
- do not hard code values in selftests
- add Makefile for selftests

v1->v2:
- renamed debugfs file from "link" to "peer"
- replaced strstep() with sscanf() for consistency
- increased char[] buf sz to 22 for copying id + port from user
- added err msg w/ expected fmt when linking as a hint to user
- prevent linking port to itself
- protect peer ptr using RCU

====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Mar 1, 2024
2 parents e2d890a + 8ee60f9 commit 76f06cb
Show file tree
Hide file tree
Showing 6 changed files with 342 additions and 5 deletions.
145 changes: 145 additions & 0 deletions drivers/net/netdevsim/bus.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,154 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count)
}
static BUS_ATTR_WO(del_device);

static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count)
{
struct netdevsim *nsim_a, *nsim_b, *peer;
struct net_device *dev_a, *dev_b;
unsigned int ifidx_a, ifidx_b;
int netnsfd_a, netnsfd_b, err;
struct net *ns_a, *ns_b;

err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b,
&ifidx_b);
if (err != 4) {
pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n");
return -EINVAL;
}

ns_a = get_net_ns_by_fd(netnsfd_a);
if (IS_ERR(ns_a)) {
pr_err("Could not find netns with fd: %d\n", netnsfd_a);
return -EINVAL;
}

ns_b = get_net_ns_by_fd(netnsfd_b);
if (IS_ERR(ns_b)) {
pr_err("Could not find netns with fd: %d\n", netnsfd_b);
put_net(ns_a);
return -EINVAL;
}

err = -EINVAL;
rtnl_lock();
dev_a = __dev_get_by_index(ns_a, ifidx_a);
if (!dev_a) {
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
ifidx_a, netnsfd_a);
goto out_err;
}

if (!netdev_is_nsim(dev_a)) {
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
ifidx_a, netnsfd_a);
goto out_err;
}

dev_b = __dev_get_by_index(ns_b, ifidx_b);
if (!dev_b) {
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
ifidx_b, netnsfd_b);
goto out_err;
}

if (!netdev_is_nsim(dev_b)) {
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
ifidx_b, netnsfd_b);
goto out_err;
}

if (dev_a == dev_b) {
pr_err("Cannot link a netdevsim to itself\n");
goto out_err;
}

err = -EBUSY;
nsim_a = netdev_priv(dev_a);
peer = rtnl_dereference(nsim_a->peer);
if (peer) {
pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a,
ifidx_a);
goto out_err;
}

nsim_b = netdev_priv(dev_b);
peer = rtnl_dereference(nsim_b->peer);
if (peer) {
pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b,
ifidx_b);
goto out_err;
}

err = 0;
rcu_assign_pointer(nsim_a->peer, nsim_b);
rcu_assign_pointer(nsim_b->peer, nsim_a);

out_err:
put_net(ns_b);
put_net(ns_a);
rtnl_unlock();

return !err ? count : err;
}
static BUS_ATTR_WO(link_device);

static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count)
{
struct netdevsim *nsim, *peer;
struct net_device *dev;
unsigned int ifidx;
int netnsfd, err;
struct net *ns;

err = sscanf(buf, "%u:%u", &netnsfd, &ifidx);
if (err != 2) {
pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n");
return -EINVAL;
}

ns = get_net_ns_by_fd(netnsfd);
if (IS_ERR(ns)) {
pr_err("Could not find netns with fd: %d\n", netnsfd);
return -EINVAL;
}

err = -EINVAL;
rtnl_lock();
dev = __dev_get_by_index(ns, ifidx);
if (!dev) {
pr_err("Could not find device with ifindex %u in netnsfd %d\n",
ifidx, netnsfd);
goto out_put_netns;
}

if (!netdev_is_nsim(dev)) {
pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n",
ifidx, netnsfd);
goto out_put_netns;
}

nsim = netdev_priv(dev);
peer = rtnl_dereference(nsim->peer);
if (!peer)
goto out_put_netns;

err = 0;
RCU_INIT_POINTER(nsim->peer, NULL);
RCU_INIT_POINTER(peer->peer, NULL);

out_put_netns:
put_net(ns);
rtnl_unlock();

return !err ? count : err;
}
static BUS_ATTR_WO(unlink_device);

static struct attribute *nsim_bus_attrs[] = {
&bus_attr_new_device.attr,
&bus_attr_del_device.attr,
&bus_attr_link_device.attr,
&bus_attr_unlink_device.attr,
NULL
};
ATTRIBUTE_GROUPS(nsim_bus);
Expand Down
53 changes: 48 additions & 5 deletions drivers/net/netdevsim/netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,35 @@
static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct netdevsim *ns = netdev_priv(dev);
unsigned int len = skb->len;
struct netdevsim *peer_ns;

rcu_read_lock();
if (!nsim_ipsec_tx(ns, skb))
goto out;
goto out_drop_free;

peer_ns = rcu_dereference(ns->peer);
if (!peer_ns)
goto out_drop_free;

skb_tx_timestamp(skb);
if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
goto out_drop_cnt;

rcu_read_unlock();
u64_stats_update_begin(&ns->syncp);
ns->tx_packets++;
ns->tx_bytes += skb->len;
ns->tx_bytes += len;
u64_stats_update_end(&ns->syncp);
return NETDEV_TX_OK;

out:
out_drop_free:
dev_kfree_skb(skb);

out_drop_cnt:
rcu_read_unlock();
u64_stats_update_begin(&ns->syncp);
ns->tx_dropped++;
u64_stats_update_end(&ns->syncp);
return NETDEV_TX_OK;
}

Expand Down Expand Up @@ -70,6 +87,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
start = u64_stats_fetch_begin(&ns->syncp);
stats->tx_bytes = ns->tx_bytes;
stats->tx_packets = ns->tx_packets;
stats->tx_dropped = ns->tx_dropped;
} while (u64_stats_fetch_retry(&ns->syncp, start));
}

Expand Down Expand Up @@ -265,6 +283,21 @@ nsim_set_features(struct net_device *dev, netdev_features_t features)
return 0;
}

static int nsim_get_iflink(const struct net_device *dev)
{
struct netdevsim *nsim, *peer;
int iflink;

nsim = netdev_priv(dev);

rcu_read_lock();
peer = rcu_dereference(nsim->peer);
iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0;
rcu_read_unlock();

return iflink;
}

static const struct net_device_ops nsim_netdev_ops = {
.ndo_start_xmit = nsim_start_xmit,
.ndo_set_rx_mode = nsim_set_rx_mode,
Expand All @@ -282,6 +315,7 @@ static const struct net_device_ops nsim_netdev_ops = {
.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
.ndo_setup_tc = nsim_setup_tc,
.ndo_set_features = nsim_set_features,
.ndo_get_iflink = nsim_get_iflink,
.ndo_bpf = nsim_bpf,
};

Expand All @@ -302,7 +336,6 @@ static void nsim_setup(struct net_device *dev)
eth_hw_addr_random(dev);

dev->tx_queue_len = 0;
dev->flags |= IFF_NOARP;
dev->flags &= ~IFF_MULTICAST;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
IFF_NO_QUEUE;
Expand Down Expand Up @@ -413,8 +446,13 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
void nsim_destroy(struct netdevsim *ns)
{
struct net_device *dev = ns->netdev;
struct netdevsim *peer;

rtnl_lock();
peer = rtnl_dereference(ns->peer);
if (peer)
RCU_INIT_POINTER(peer->peer, NULL);
RCU_INIT_POINTER(ns->peer, NULL);
unregister_netdevice(dev);
if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
nsim_macsec_teardown(ns);
Expand All @@ -427,6 +465,11 @@ void nsim_destroy(struct netdevsim *ns)
free_netdev(dev);
}

bool netdev_is_nsim(struct net_device *dev)
{
return dev->netdev_ops == &nsim_netdev_ops;
}

static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/netdevsim/netdevsim.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ struct netdevsim {

u64 tx_packets;
u64 tx_bytes;
u64 tx_dropped;
struct u64_stats_sync syncp;

struct nsim_bus_dev *nsim_bus_dev;
Expand Down Expand Up @@ -125,11 +126,13 @@ struct netdevsim {
} udp_ports;

struct nsim_ethtool ethtool;
struct netdevsim __rcu *peer;
};

struct netdevsim *
nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port);
void nsim_destroy(struct netdevsim *ns);
bool netdev_is_nsim(struct net_device *dev);

void nsim_ethtool_init(struct netdevsim *ns);

Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/drivers/net/netdevsim/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \
fib.sh \
hw_stats_l3.sh \
nexthop.sh \
peer.sh \
psample.sh \
tc-mq-visibility.sh \
udp_tunnel_nic.sh \
Expand Down
Loading

0 comments on commit 76f06cb

Please sign in to comment.