diff --git a/INSTALL.Linux b/INSTALL.Linux index c46ab55549e..0d56da65d7c 100644 --- a/INSTALL.Linux +++ b/INSTALL.Linux @@ -234,9 +234,10 @@ Prerequisites section, follow the procedure below to build. whether this is the case. If you know that your particular driver can handle it (for example by testing sending large TCP packets over VLANs) then passing in a value of 1 may improve performance. Modules built for - Linux kernels 2.6.37 and later do not need this and do not have this - parameter. If you do not understand what this means or do not know if - your driver will work, do not set this. + Linux kernels 2.6.37 and later, as well as specially patched versions + of earlier kernels, do not need this and do not have this parameter. If + you do not understand what this means or do not know if your driver + will work, do not set this. 7. Initialize the configuration database using ovsdb-tool, e.g.: diff --git a/acinclude.m4 b/acinclude.m4 index 5843bfa588e..fa6f534deb3 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -211,6 +211,9 @@ AC_DEFUN([OVS_CHECK_LINUX26_COMPAT], [ OVS_GREP_IFELSE([$KSRC26/include/linux/if_link.h], [rtnl_link_stats64]) + OVS_GREP_IFELSE([$KSRC26/include/linux/if_vlan.h], [ADD_ALL_VLANS_CMD], + [OVS_DEFINE([HAVE_VLAN_BUG_WORKAROUND])]) + OVS_CHECK_LOG2_H if cmp -s datapath/linux-2.6/kcompat.h.new \ diff --git a/datapath/vport-netdev.c b/datapath/vport-netdev.c index 11faf8ad11b..2583566b1af 100644 --- a/datapath/vport-netdev.c +++ b/datapath/vport-netdev.c @@ -22,12 +22,15 @@ #include "vport-internal_dev.h" #include "vport-netdev.h" -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) && \ + !defined(HAVE_VLAN_BUG_WORKAROUND) #include static int vlan_tso __read_mostly = 0; module_param(vlan_tso, int, 0644); MODULE_PARM_DESC(vlan_tso, "Enable TSO for VLAN packets"); +#else +#define vlan_tso true #endif /* If the native device stats aren't 64 bit use the vport stats tracking instead. */ @@ -266,6 +269,19 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb) vport_receive(vport, skb); } +static bool dev_supports_vlan_tx(struct net_device *dev) +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) + /* Software fallback means every device supports vlan_tci on TX. */ + return true; +#elif defined(HAVE_VLAN_BUG_WORKAROUND) + return dev->features & NETIF_F_HW_VLAN_TX; +#else + /* Assume that the driver is buggy. */ + return false; +#endif +} + static int netdev_send(struct vport *vport, struct sk_buff *skb) { struct netdev_vport *netdev_vport = netdev_vport_priv(vport); @@ -274,8 +290,7 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) skb->dev = netdev_vport->dev; forward_ip_summed(skb); -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) - if (vlan_tx_tag_present(skb)) { + if (vlan_tx_tag_present(skb) && !dev_supports_vlan_tx(skb->dev)) { int err; int features = 0; @@ -339,7 +354,6 @@ static int netdev_send(struct vport *vport, struct sk_buff *skb) return 0; vlan_set_tci(skb, 0); } -#endif len = skb->len; dev_queue_xmit(skb); diff --git a/utilities/automake.mk b/utilities/automake.mk index 984b47a941e..b267c02f1d7 100644 --- a/utilities/automake.mk +++ b/utilities/automake.mk @@ -32,7 +32,9 @@ EXTRA_DIST += \ utilities/ovs-save \ utilities/ovs-tcpundump.1.in \ utilities/ovs-tcpundump.in \ + utilities/ovs-vlan-bugs.man \ utilities/ovs-vlan-test.in \ + utilities/ovs-vlan-bug-workaround.8.in \ utilities/ovs-vlan-test.8.in \ utilities/ovs-vsctl.8.in DISTCLEANFILES += \ @@ -65,6 +67,7 @@ man_MANS += \ utilities/ovs-pcap.1 \ utilities/ovs-pki.8 \ utilities/ovs-tcpundump.1 \ + utilities/ovs-vlan-bug-workaround.8.in \ utilities/ovs-vlan-test.8 \ utilities/ovs-vsctl.8 @@ -94,6 +97,10 @@ utilities_ovs_vsctl_SOURCES = utilities/ovs-vsctl.c vswitchd/vswitch-idl.c utilities_ovs_vsctl_LDADD = lib/libopenvswitch.a $(SSL_LIBS) if HAVE_NETLINK +sbin_PROGRAMS += utilities/ovs-vlan-bug-workaround +utilities_ovs_vlan_bug_workaround_SOURCES = utilities/ovs-vlan-bug-workaround.c +utilities_ovs_vlan_bug_workaround_LDADD = lib/libopenvswitch.a + noinst_PROGRAMS += utilities/nlmon utilities_nlmon_SOURCES = utilities/nlmon.c utilities_nlmon_LDADD = lib/libopenvswitch.a diff --git a/utilities/ovs-vlan-bug-workaround.8.in b/utilities/ovs-vlan-bug-workaround.8.in new file mode 100644 index 00000000000..d05fe938bab --- /dev/null +++ b/utilities/ovs-vlan-bug-workaround.8.in @@ -0,0 +1,90 @@ +.\" -*- nroff -*- +.de IQ +. br +. ns +. IP "\\$1" +.. +.TH ovs\-vlan\-bug\-workaround 8 "February 2011" "Open vSwitch" "Open vSwitch Manual" +.ds PN ovs\-vlan\-bug\-workaround +. +.SH NAME +ovs\-vlan\-bug\-workaround \- utility for configuring Linux VLAN driver bug workaround +. +.SH SYNOPSIS +\fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBon\fR +.br +\fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBoff\fR +.br +\fBovs\-vlan\-bug\-workaround \-\-help +.br +\fBovs\-vlan\-bug\-workaround \-\-version +.SH DESCRIPTION +. +.PP +Some Linux network drivers support a feature called ``VLAN +acceleration''. VLAN acceleration is associated with a data structure +called a \fBvlan_group\fR that is, abstractly, a dictionary that maps +from a VLAN ID (in the range 0 to 4095) to a VLAN device, that is, a +Linux network device associated with a particular VLAN, +e.g. \fBeth0.9\fR for VLAN 9 on \fBeth0\fR. +.PP +Some drivers that support VLAN acceleration have bugs that fall +roughly into the categories listed below. \fBovs\-vlan\-test\fR(8) +can test for these driver bugs. +.so utilities/ovs-vlan-bugs.man +.PP +.PP +The correct long term solution is to fix these driver bugs. +.PP +For now, \fBovs\-vlan\-bug\-workaround\fR can enable a special-purpose +workaround for devices with buggy VLAN acceleration. A kernel patch +must be applied for this workaround to work. +.PP +Use the command \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBon\fR to +enable the VLAN driver bug workaround for network device \fInetdev\fR. +Use the command \fBovs\-vlan\-bug\-workaround \fInetdev\fR \fBoff\fR to +disable the VLAN driver bug workaround for network device \fInetdev\fR. +.SH "DRIVER DETAILS" +.PP +The following drivers in Linux version +2.6.32.12-0.7.1.xs1.0.0.311.170586 implement VLAN acceleration and are +relevant to Open vSwitch on XenServer. We have not tested any version +of most of these drivers, so we do not know whether they have a VLAN +problem that needs to be fixed. The drivers are listed by the name +that they report in, e.g., \fBethtool \-i\fR output; in a few cases +this differs slightly from the name of the module's \fB.ko\fR file: +. +.nf +.ta T 1i +\fB8139cp acenic amd8111e atl1c ATL1E atl1 +atl2 be2net bna bnx2 bnx2x cnic +cxgb cxgb3 e1000 e1000e enic forcedeth +igb igbvf ixgb ixgbe jme ml4x_core +ns83820 qlge r8169 S2IO sky2 starfire +tehuti tg3 typhoon via-velocity vxge +.fi +.PP +The following drivers use \fBvlan_group\fR but are irrelevant to Open +vSwitch on XenServer: +.IP "\fBbonding\fR" +Not used with Open vSwitch on XenServer. +.IP "\fBgianfar\fR" +Not shipped with XenServer. A FreeScale CPU-integrated device. +.IP "\fBehea\fR" +Cannot be built on x86. IBM Power architecture only. +.IP "\fBstmmac\fR" +Cannot be built on x86. SH4 architecture only. +.IP "\fBvmxnet3\fR" +Not shipped with XenServer. For use inside VMware VMs only. +. +.SH OPTIONS +. +.so lib/common.man +. +.SH BUGS +. +Obviously. +. +.SH "SEE ALSO" +. +.BR ovs\-vlan\-test (8). diff --git a/utilities/ovs-vlan-bug-workaround.c b/utilities/ovs-vlan-bug-workaround.c new file mode 100644 index 00000000000..54316ddc7db --- /dev/null +++ b/utilities/ovs-vlan-bug-workaround.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2011 Nicira Networks. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "command-line.h" +#include "util.h" + +#define ADD_ALL_VLANS_CMD 10 +#define DEL_ALL_VLANS_CMD 11 + +static void usage(void); +static void parse_options(int argc, char *argv[]); + +int +main(int argc, char *argv[]) +{ + struct vlan_ioctl_args vlan_args; + const char *netdev, *setting; + int fd; + + set_program_name(argv[0]); + + parse_options(argc, argv); + if (argc - optind != 2) { + ovs_fatal(0, "exactly two non-option arguments are required " + "(use --help for help)"); + } + + memset(&vlan_args, 0, sizeof vlan_args); + + /* Get command. */ + setting = argv[optind + 1]; + if (!strcmp(setting, "on")) { + vlan_args.cmd = ADD_ALL_VLANS_CMD; + } else if (!strcmp(setting, "off")) { + vlan_args.cmd = DEL_ALL_VLANS_CMD; + } else { + ovs_fatal(0, "second command line argument must be \"on\" or \"off\" " + "(not \"%s\")", setting); + } + + /* Get network device name. */ + netdev = argv[optind]; + if (strlen(netdev) >= IFNAMSIZ) { + ovs_fatal(0, "%s: network device name too long", netdev); + } + strcpy(vlan_args.device1, netdev); + + /* Execute operation. */ + fd = socket(AF_INET, SOCK_STREAM, 0); + if (fd < 0) { + ovs_fatal(errno, "socket creation failed"); + } + if (ioctl(fd, SIOCSIFVLAN, &vlan_args) < 0) { + if (errno == ENOPKG) { + ovs_fatal(0, "operation failed (8021q module not loaded)"); + } else if (errno == EOPNOTSUPP) { + ovs_fatal(0, "operation failed (kernel does not support the " + "VLAN bug workaround)"); + } else { + ovs_fatal(errno, "operation failed"); + } + } + close(fd); + + return 0; +} + +static void +usage(void) +{ + printf("\ +%s, for enabling or disabling the kernel VLAN bug workaround\n\ +usage: %s NETDEV SETTING\n\ +where NETDEV is a network device (e.g. \"eth0\")\n\ + and SETTING is \"on\" to enable the workaround or \"off\" to disable it.\n\ +\n\ +Options:\n\ + -h, --help Print this helpful information\n\ + -V, --version Display version information\n", + program_name, program_name); + exit(EXIT_SUCCESS); +} + +static void +parse_options(int argc, char *argv[]) +{ + static const struct option long_options[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {0, 0, 0, 0}, + }; + char *short_options = long_options_to_short_options(long_options); + + for (;;) { + int option; + + option = getopt_long(argc, argv, "+t:hVe", long_options, NULL); + if (option == -1) { + break; + } + switch (option) { + case 'h': + usage(); + break; + + case 'V': + OVS_PRINT_VERSION(0, 0); + exit(EXIT_SUCCESS); + + case '?': + exit(EXIT_FAILURE); + + default: + NOT_REACHED(); + } + } + free(short_options); +} diff --git a/utilities/ovs-vlan-bugs.man b/utilities/ovs-vlan-bugs.man new file mode 100644 index 00000000000..bdca8fcc3dc --- /dev/null +++ b/utilities/ovs-vlan-bugs.man @@ -0,0 +1,17 @@ +.IP \(bu +When NICs use VLAN stripping on receive they must pass a pointer to a +\fBvlan_group\fR when reporting the stripped tag to the networking +core. If no \fBvlan_group\fR is in use then some drivers just drop +the extracted tag. Drivers are supposed to only enable stripping if a +\fBvlan_group\fR is registered but not all of them do that. +. +.IP \(bu +Some drivers size their receive buffers based on whether a +\fBvlan_group\fR is enabled, meaning that a maximum size packet with a +VLAN tag will not fit if no \fBvlan_group\fR is configured. +. +.IP \(bu +On transmit, some drivers expect that VLAN acceleration will be used +if it is available, which can only be done if a \fBvlan_group\fR is +configured. In these cases, the driver may fail to parse the packet +and correctly setup checksum offloading or TSO. diff --git a/utilities/ovs-vlan-test.8.in b/utilities/ovs-vlan-test.8.in index fbf1552f079..602d785d744 100644 --- a/utilities/ovs-vlan-test.8.in +++ b/utilities/ovs-vlan-test.8.in @@ -18,23 +18,7 @@ client mode connecting to an \fBovs\-vlan\-test\fR server. \fBovs\-vlan\-test\fR will display "OK" if it did not detect problems. .PP Some examples of the types of problems that may be encountered are: -.IP \(bu -When NICs use vlan stripping on receive they must pass a pointer to -a vlan group when reporting the stripped tag to the networking core. -If there is no vlan group in use then some drivers just drop the -extracted tag. Drivers are supposed to only enable stripping if a -vlan group is registered but not all of them do that. -. -.IP \(bu -Some drivers size their receive buffers based on whether a vlan -group is enabled, meaning that a maximum size packet with a vlan tag -will not fit if a vlan group is not configured. -. -.IP \(bu -On transmit some drivers expect that vlan acceleration will be used -if it is available (which can only be done if a vlan group is -configured). In these cases, the driver may fail to parse the packet -and correctly setup checksum offloading and/or TSO. +.so utilities/ovs-vlan-bugs.man . .SS "Client Mode" An \fBovs\-vlan\-test\fR client may be run on a host to check for VLAN diff --git a/xenserver/openvswitch-xen.spec b/xenserver/openvswitch-xen.spec index ab06a06b5bb..7f2cabd1dc5 100644 --- a/xenserver/openvswitch-xen.spec +++ b/xenserver/openvswitch-xen.spec @@ -349,6 +349,7 @@ fi /usr/share/openvswitch/scripts/xen-bugtool-tc-class-show /usr/share/openvswitch/scripts/ovs-save /usr/share/openvswitch/vswitch.ovsschema +/usr/sbin/ovs-vlan-bug-workaround /usr/sbin/ovs-vswitchd /usr/sbin/ovsdb-server /usr/bin/ovs-appctl @@ -371,6 +372,7 @@ fi /usr/share/man/man8/ovs-parse-leaks.8.gz /usr/share/man/man1/ovs-pcap.1.gz /usr/share/man/man1/ovs-tcpundump.1.gz +/usr/share/man/man8/ovs-vlan-bug-workaround.8.gz /usr/share/man/man8/ovs-vlan-test.8.gz /usr/share/man/man8/ovs-vsctl.8.gz /usr/share/man/man8/ovs-vswitchd.8.gz diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigure.py b/xenserver/opt_xensource_libexec_InterfaceReconfigure.py index 0fd79e69e16..7a2fe4cb082 100644 --- a/xenserver/opt_xensource_libexec_InterfaceReconfigure.py +++ b/xenserver/opt_xensource_libexec_InterfaceReconfigure.py @@ -284,6 +284,7 @@ def _otherconfig_from_xml(n, attrs): _PIF_OTHERCONFIG_ATTRS = [ 'domain', 'peerdns', 'defaultroute', 'mtu', 'static-routes' ] + \ [ 'bond-%s' % x for x in 'mode', 'miimon', 'downdelay', 'updelay', 'use_carrier', 'hashing-algorithm' ] + \ + [ 'vlan-bug-workaround' ] + \ _ETHTOOL_OTHERCONFIG_ATTRS _PIF_ATTRS = { 'uuid': (_str_to_xml,_str_from_xml), diff --git a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py index 6c9e3fa8c49..697df5f10cb 100644 --- a/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py +++ b/xenserver/opt_xensource_libexec_InterfaceReconfigureVswitch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2008,2009 Citrix Systems, Inc. +# Copyright (c) 2008,2009,2011 Citrix Systems, Inc. # Copyright (c) 2009,2010,2011 Nicira Networks. # # This program is free software; you can redistribute it and/or modify @@ -38,6 +38,49 @@ def netdev_up(netdev, mtu=None): run_command(["/sbin/ifconfig", netdev, 'up'] + mtu) +# This is a list of drivers that do support VLAN tx or rx acceleration, but +# to which the VLAN bug workaround should not be applied. This could be +# because these are known-good drivers (that is, they do not have any of +# the bugs that the workaround avoids) or because the VLAN bug workaround +# will not work for them and may cause other problems. +# +# This is a very short list because few drivers have been tested. +NO_VLAN_WORKAROUND_DRIVERS = ( + "bonding", +) +def netdev_get_driver_name(netdev): + """Returns the name of the driver for network device 'netdev'""" + symlink = '%s/sys/class/net/%s/device/driver' % (root_prefix(), netdev) + try: + target = os.readlink(symlink) + except OSError, e: + log("%s: could not read netdev's driver name (%s)" % (netdev, e)) + return None + + slash = target.rfind('/') + if slash < 0: + log("target %s of symbolic link %s does not contain slash" + % (target, symlink)) + return None + + return target[slash + 1:] + +def netdev_get_features(netdev): + """Returns the features bitmap for the driver for 'netdev'. + The features bitmap is a set of NETIF_F_ flags supported by its driver.""" + try: + features = open("%s/sys/class/net/%s/features" % (root_prefix(), netdev)).read().strip() + return int(features, 0) + except: + return 0 # interface prolly doesn't exist + +def netdev_has_vlan_accel(netdev): + """Returns True if 'netdev' supports VLAN acceleration, False otherwise.""" + NETIF_F_HW_VLAN_TX = 128 + NETIF_F_HW_VLAN_RX = 256 + NETIF_F_VLAN = NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX + return (netdev_get_features(netdev) & NETIF_F_VLAN) != 0 + # # PIF miscellanea # @@ -545,6 +588,20 @@ def configure(self): if len(offload): run_command(['/sbin/ethtool', '-K', dev] + offload) + driver = netdev_get_driver_name(dev) + if 'vlan-bug-workaround' in oc: + vlan_bug_workaround = oc['vlan-bug-workaround'] == 'true' + elif driver in NO_VLAN_WORKAROUND_DRIVERS: + vlan_bug_workaround = False + else: + vlan_bug_workaround = netdev_has_vlan_accel(dev) + + if vlan_bug_workaround: + setting = 'on' + else: + setting = 'off' + run_command(['/usr/sbin/ovs-vlan-bug-workaround', dev, setting]) + datapath_modify_config(self._vsctl_argv) def post(self):