Skip to content

Commit

Permalink
Initial implementation of sFlow.
Browse files Browse the repository at this point in the history
Tested very slightly with "ping" and "sflowtool -t | tcpdump -r -".
  • Loading branch information
blp committed Jan 4, 2010
1 parent 622ee2c commit 72b0630
Show file tree
Hide file tree
Showing 23 changed files with 984 additions and 37 deletions.
7 changes: 4 additions & 3 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ What is Open vSwitch?
Open vSwitch is a multilayer software switch licensed under the open
source Apache 2 license. Our goal is to implement a production
quality switch platform that supports standard management interfaces
(e.g. NetFlow, RSPAN, ERSPAN, IOS-like CLI), and opens the forwarding
functions to programmatic extension and control.
(e.g. NetFlow, sFlow, RSPAN, ERSPAN, IOS-like CLI), and opens the
forwarding functions to programmatic extension and control.

Open vSwitch is well suited to function as a virtual switch in VM
environments. In addition to exposing standard control and visibility
Expand All @@ -20,7 +20,8 @@ The bulk of the code is written in platform-independent C and is
easily ported to other environments. The current release of Open
vSwitch supports the following features:

* Visibility into inter-VM communication via NetFlow, SPAN, and RSPAN
* Visibility into inter-VM communication via NetFlow, sFlow, SPAN,
and RSPAN
* Standard 802.1Q VLAN model with trunking
* Per VM policing
* NIC bonding with source-MAC load balancing
Expand Down
40 changes: 40 additions & 0 deletions datapath/actions.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,33 @@ output_control(struct datapath *dp, struct sk_buff *skb, u32 arg, gfp_t gfp)
return dp_output_control(dp, skb, _ODPL_ACTION_NR, arg);
}

/* Send a copy of this packet up to the sFlow agent, along with extra
* information about what happened to it. */
static void sflow_sample(struct datapath *dp, struct sk_buff *skb,
const union odp_action *a, int n_actions, gfp_t gfp)
{
struct odp_sflow_sample_header *hdr;
unsigned int actlen = n_actions * sizeof(union odp_action);
unsigned int hdrlen = sizeof(struct odp_sflow_sample_header);
struct sk_buff *nskb;
int i;

nskb = skb_copy_expand(skb, actlen + hdrlen, 0, gfp);
if (!nskb)
return;

memcpy(__skb_push(nskb, actlen), a, actlen);
hdr = (struct odp_sflow_sample_header*)__skb_push(nskb, hdrlen);
hdr->n_actions = n_actions;
hdr->sample_pool = 0;
for_each_possible_cpu (i) {
const struct dp_stats_percpu *stats;
stats = per_cpu_ptr(dp->stats_percpu, i);
hdr->sample_pool += stats->sflow_pool;
}
dp_output_control(dp, nskb, _ODPL_SFLOW_NR, 0);
}

/* Execute a list of actions against 'skb'. */
int execute_actions(struct datapath *dp, struct sk_buff *skb,
struct odp_flow_key *key,
Expand All @@ -378,6 +405,19 @@ int execute_actions(struct datapath *dp, struct sk_buff *skb,
* is slightly obscure just to avoid that. */
int prev_port = -1;
int err;

if (dp->sflow_probability) {
/* Increment sample pool. */
int cpu = get_cpu();
per_cpu_ptr(dp->stats_percpu, cpu)->sflow_pool++;
put_cpu();

/* Sample packet. */
if (dp->sflow_probability == UINT_MAX ||
net_random() < dp->sflow_probability)
sflow_sample(dp, skb, a, n_actions, gfp);
}

for (; n_actions > 0; a++, n_actions--) {
WARN_ON_ONCE(skb_shared(skb));
if (prev_port != -1) {
Expand Down
14 changes: 12 additions & 2 deletions datapath/datapath.c
Original file line number Diff line number Diff line change
Expand Up @@ -715,8 +715,7 @@ dp_output_control(struct datapath *dp, struct sk_buff *skb, int queue_no,
int err;

WARN_ON_ONCE(skb_shared(skb));
BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR);

BUG_ON(queue_no != _ODPL_MISS_NR && queue_no != _ODPL_ACTION_NR && queue_no != _ODPL_SFLOW_NR);
queue = &dp->queues[queue_no];
err = -ENOBUFS;
if (skb_queue_len(queue) >= DP_MAX_QUEUE_LEN)
Expand Down Expand Up @@ -1393,6 +1392,7 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
int dp_idx = iminor(f->f_dentry->d_inode);
struct datapath *dp;
int drop_frags, listeners, port_no;
unsigned int sflow_probability;
int err;

/* Handle commands with special locking requirements up front. */
Expand Down Expand Up @@ -1456,6 +1456,16 @@ static long openvswitch_ioctl(struct file *f, unsigned int cmd,
set_listen_mask(f, listeners);
break;

case ODP_GET_SFLOW_PROBABILITY:
err = put_user(dp->sflow_probability, (unsigned int __user *)argp);
break;

case ODP_SET_SFLOW_PROBABILITY:
err = get_user(sflow_probability, (unsigned int __user *)argp);
if (!err)
dp->sflow_probability = sflow_probability;
break;

case ODP_PORT_QUERY:
err = query_port(dp, (struct odp_port __user *)argp);
break;
Expand Down
29 changes: 26 additions & 3 deletions datapath/datapath.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,15 @@ struct dp_bucket {
struct sw_flow *flows[];
};

#define DP_N_QUEUES 2
#define DP_N_QUEUES 3
#define DP_MAX_QUEUE_LEN 100

struct dp_stats_percpu {
u64 n_frags;
u64 n_hit;
u64 n_missed;
u64 n_lost;
u64 sflow_pool; /* Packets that could have been sampled. */
};

struct dp_port_group {
Expand All @@ -95,10 +96,29 @@ struct dp_port_group {
u16 ports[];
};

/**
* struct datapath - datapath for flow-based packet switching
* @mutex: Mutual exclusion for ioctls.
* @dp_idx: Datapath number (index into the dps[] array in datapath.c).
* @ifobj: &struct kobject representing the datapath.
* @drop_frags: Drop all IP fragments if nonzero.
* @queues: %DP_N_QUEUES sets of queued packets for userspace to handle.
* @waitqueue: Waitqueue, for waiting for new packets in @queues.
* @n_flows: Number of flows currently in flow table.
* @table: Current flow table (RCU protected).
* @groups: Port groups, used by ODPAT_OUTPUT_GROUP action (RCU protected).
* @n_ports: Number of ports currently in @ports.
* @ports: Map from port number to &struct net_bridge_port. %ODPP_LOCAL port
* always exists, other ports may be %NULL.
* @port_list: List of all ports in @ports in arbitrary order.
* @stats_percpu: Per-CPU datapath statistics.
* @sflow_probability: Probability of sampling a packet to the %ODPL_SFLOW
* queue, where 0 means never sample, UINT_MAX means always sample, and
* other values are intermediate probabilities.
*/
struct datapath {
struct mutex mutex;
int dp_idx;

struct kobject ifobj;

int drop_frags;
Expand All @@ -117,10 +137,13 @@ struct datapath {
/* Switch ports. */
unsigned int n_ports;
struct net_bridge_port *ports[DP_MAX_PORTS];
struct list_head port_list; /* All ports, including local_port. */
struct list_head port_list;

/* Stats. */
struct dp_stats_percpu *stats_percpu;

/* sFlow Sampling */
unsigned int sflow_probability;
};

struct net_bridge_port {
Expand Down
26 changes: 24 additions & 2 deletions include/openvswitch/datapath-protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@

#define ODP_EXECUTE _IOR('O', 18, struct odp_execute)

#define ODP_SET_SFLOW_PROBABILITY _IOR('O', 20, int)
#define ODP_GET_SFLOW_PROBABILITY _IOW('O', 21, int)

struct odp_stats {
/* Flows. */
__u32 n_flows; /* Number of flows in flow table. */
Expand All @@ -98,6 +101,7 @@ struct odp_stats {
/* Queues. */
__u16 max_miss_queue; /* Max length of ODPL_MISS queue. */
__u16 max_action_queue; /* Max length of ODPL_ACTION queue. */
__u16 max_sflow_queue; /* Max length of ODPL_SFLOW queue. */
};

/* Logical ports. */
Expand All @@ -109,7 +113,9 @@ struct odp_stats {
#define ODPL_MISS (1 << _ODPL_MISS_NR)
#define _ODPL_ACTION_NR 1 /* Packet output to ODPP_CONTROLLER. */
#define ODPL_ACTION (1 << _ODPL_ACTION_NR)
#define ODPL_ALL (ODPL_MISS | ODPL_ACTION)
#define _ODPL_SFLOW_NR 2 /* sFlow samples. */
#define ODPL_SFLOW (1 << _ODPL_SFLOW_NR)
#define ODPL_ALL (ODPL_MISS | ODPL_ACTION | ODPL_SFLOW)

/* Format of messages read from datapath fd. */
struct odp_msg {
Expand All @@ -118,7 +124,23 @@ struct odp_msg {
__u16 port; /* Port on which frame was received. */
__u16 reserved;
__u32 arg; /* Argument value specified in action. */
/* Followed by packet data. */

/*
* Followed by:
*
* ODPL_MISS, ODPL_ACTION: packet data.
*
* ODPL_SFLOW: "struct odp_sflow_sample_header", followed by
* an array of "union odp_action"s, followed by packet data.
*/
};

/* Header added to sFlow sampled packet. */
struct odp_sflow_sample_header {
__u64 sample_pool; /* Number of potentially sampled packets. */
__u32 n_actions; /* Number of following "union odp_action"s. */
__u32 reserved; /* Pad up to 64-bit boundary. */
/* Followed by n_action "union odp_action"s. */
};

#define ODP_PORT_INTERNAL (1 << 0) /* This port is simulated. */
Expand Down
18 changes: 17 additions & 1 deletion lib/dpif-linux.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2009 Nicira Networks.
* Copyright (c) 2008, 2009, 2010 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -196,6 +196,7 @@ dpif_linux_delete(struct dpif *dpif_)
static int
dpif_linux_get_stats(const struct dpif *dpif_, struct odp_stats *stats)
{
memset(stats, 0, sizeof *stats);
return do_ioctl(dpif_, ODP_DP_STATS, stats);
}

Expand Down Expand Up @@ -395,6 +396,19 @@ dpif_linux_recv_set_mask(struct dpif *dpif_, int listen_mask)
return do_ioctl(dpif_, ODP_SET_LISTEN_MASK, &listen_mask);
}

static int
dpif_linux_get_sflow_probability(const struct dpif *dpif_,
uint32_t *probability)
{
return do_ioctl(dpif_, ODP_GET_SFLOW_PROBABILITY, probability);
}

static int
dpif_linux_set_sflow_probability(struct dpif *dpif_, uint32_t probability)
{
return do_ioctl(dpif_, ODP_SET_SFLOW_PROBABILITY, &probability);
}

static int
dpif_linux_recv(struct dpif *dpif_, struct ofpbuf **bufp)
{
Expand Down Expand Up @@ -475,6 +489,8 @@ const struct dpif_class dpif_linux_class = {
dpif_linux_execute,
dpif_linux_recv_get_mask,
dpif_linux_recv_set_mask,
dpif_linux_get_sflow_probability,
dpif_linux_set_sflow_probability,
dpif_linux_recv,
dpif_linux_recv_wait,
};
Expand Down
2 changes: 2 additions & 0 deletions lib/dpif-netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1333,6 +1333,8 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_execute,
dpif_netdev_recv_get_mask,
dpif_netdev_recv_set_mask,
NULL, /* get_sflow_probability */
NULL, /* set_sflow_probability */
dpif_netdev_recv,
dpif_netdev_recv_wait,
};
17 changes: 17 additions & 0 deletions lib/dpif-provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,23 @@ struct dpif_class {
* corresponding type when it calls the recv member function. */
int (*recv_set_mask)(struct dpif *dpif, int listen_mask);

/* Retrieves 'dpif''s sFlow sampling probability into '*probability'.
* Return value is 0 or a positive errno value. EOPNOTSUPP indicates that
* the datapath does not support sFlow, as does a null pointer.
*
* A probability of 0 means sample no packets, UINT32_MAX means sample
* every packet, and other values are intermediate probabilities. */
int (*get_sflow_probability)(const struct dpif *dpif,
uint32_t *probability);

/* Sets 'dpif''s sFlow sampling probability to 'probability'. Return value
* is 0 or a positive errno value. EOPNOTSUPP indicates that the datapath
* does not support sFlow, as does a null pointer.
*
* A probability of 0 means sample no packets, UINT32_MAX means sample
* every packet, and other values are intermediate probabilities. */
int (*set_sflow_probability)(struct dpif *dpif, uint32_t probability);

/* Attempts to receive a message from 'dpif'. If successful, stores the
* message into '*packetp'. The message, if one is received, must begin
* with 'struct odp_msg' as a header. Only messages of the types selected
Expand Down
41 changes: 39 additions & 2 deletions lib/dpif.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2009 Nicira Networks.
* Copyright (c) 2008, 2009, 2010 Nicira Networks.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -298,6 +298,7 @@ dpif_set_drop_frags(struct dpif *dpif, bool drop_frags)
return error;
}


/* Attempts to add 'devname' as a port on 'dpif', given the combination of
* ODP_PORT_* flags in 'flags'. If successful, returns 0 and sets '*port_nop'
* to the new port's port number (if 'port_nop' is non-null). On failure,
Expand Down Expand Up @@ -844,6 +845,41 @@ dpif_recv_set_mask(struct dpif *dpif, int listen_mask)
return error;
}

/* Retrieve the sFlow sampling probability. A probability of 0 means sample no
* packets, UINT32_MAX means sample every packet, and other values are
* intermediate probabilities.
*
* Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
* indicates that 'dpif' does not support sFlow sampling. */
int
dpif_get_sflow_probability(const struct dpif *dpif, uint32_t *probability)
{
int error = (dpif->class->get_sflow_probability
? dpif->class->get_sflow_probability(dpif, probability)
: EOPNOTSUPP);
if (error) {
*probability = 0;
}
log_operation(dpif, "get_sflow_probability", error);
return error;
}

/* Set the sFlow sampling probability. A probability of 0 means sample no
* packets, UINT32_MAX means sample every packet, and other values are
* intermediate probabilities.
*
* Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP
* indicates that 'dpif' does not support sFlow sampling. */
int
dpif_set_sflow_probability(struct dpif *dpif, uint32_t probability)
{
int error = (dpif->class->set_sflow_probability
? dpif->class->set_sflow_probability(dpif, probability)
: EOPNOTSUPP);
log_operation(dpif, "set_sflow_probability", error);
return error;
}

/* Attempts to receive a message from 'dpif'. If successful, stores the
* message into '*packetp'. The message, if one is received, will begin with
* 'struct odp_msg' as a header. Only messages of the types selected with
Expand All @@ -867,6 +903,7 @@ dpif_recv(struct dpif *dpif, struct ofpbuf **packetp)
"%zu on port %"PRIu16": %s", dpif_name(dpif),
(msg->type == _ODPL_MISS_NR ? "miss"
: msg->type == _ODPL_ACTION_NR ? "action"
: msg->type == _ODPL_SFLOW_NR ? "sFlow"
: "<unknown>"),
payload_len, msg->port, s);
free(s);
Expand All @@ -893,7 +930,7 @@ dpif_recv_purge(struct dpif *dpif)
return error;
}

for (i = 0; i < stats.max_miss_queue + stats.max_action_queue; i++) {
for (i = 0; i < stats.max_miss_queue + stats.max_action_queue + stats.max_sflow_queue; i++) {
struct ofpbuf *buf;
error = dpif_recv(dpif, &buf);
if (error) {
Expand Down
2 changes: 2 additions & 0 deletions lib/dpif.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ int dpif_execute(struct dpif *, uint16_t in_port,

int dpif_recv_get_mask(const struct dpif *, int *listen_mask);
int dpif_recv_set_mask(struct dpif *, int listen_mask);
int dpif_get_sflow_probability(const struct dpif *, uint32_t *probability);
int dpif_set_sflow_probability(struct dpif *, uint32_t probability);
int dpif_recv(struct dpif *, struct ofpbuf **);
int dpif_recv_purge(struct dpif *);
void dpif_recv_wait(struct dpif *);
Expand Down
1 change: 1 addition & 0 deletions lib/vlog-modules.def
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ VLOG_MODULE(proc_net_compat)
VLOG_MODULE(process)
VLOG_MODULE(rconn)
VLOG_MODULE(rtnetlink)
VLOG_MODULE(sflow)
VLOG_MODULE(stp)
VLOG_MODULE(stats)
VLOG_MODULE(status)
Expand Down
Loading

0 comments on commit 72b0630

Please sign in to comment.