Skip to content

Commit

Permalink
dpif-netdev: Add dpif-netdev/pmd-stats-* appctl commands.
Browse files Browse the repository at this point in the history
These commands can be used to get packets and cycles counters on a pmd
thread basis.  They're useful to get a clearer picture about the
performance of the userspace datapath.

They export these pieces of information:

- A (per-thread) view of the caches hit rate. Hits in the exact match
  cache are reported separately from hits in the masked classifier
- A rough cycles count. This will allow to estimate the load of OVS and
  the polling overhead.

Signed-off-by: Daniele Di Proietto <[email protected]>
Acked-by: Ethan Jackson <[email protected]>
  • Loading branch information
ddiproietto authored and ejj committed Apr 14, 2015
1 parent c8973eb commit 6553d06
Show file tree
Hide file tree
Showing 3 changed files with 210 additions and 1 deletion.
8 changes: 8 additions & 0 deletions INSTALL.DPDK.md
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,14 @@ Using the DPDK with ovs-vswitchd:
Note, core 0 is always reserved from non-pmd threads and should never be set
in the cpu mask.
To understand where most of the time is spent and whether the caches are
effective, these commands can be used:
```
ovs-appctl dpif-netdev/pmd-stats-clear #To reset statistics
ovs-appctl dpif-netdev/pmd-stats-show
```
DPDK Rings :
------------
Expand Down
185 changes: 184 additions & 1 deletion lib/dpif-netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,13 @@ struct dp_netdev_pmd_thread {
/* threads on same numa node. */
int core_id; /* CPU core id of this pmd thread. */
int numa_id; /* numa node id of this pmd thread. */

/* Only a pmd thread can write on its own 'cycles' and 'stats'.
* The main thread keeps 'stats_zero' and 'cycles_zero' as base
* values and subtracts them from 'stats' and 'cycles' before
* reporting to the user */
unsigned long long stats_zero[DP_N_STATS];
uint64_t cycles_zero[PMD_N_CYCLES];
};

#define PMD_INITIAL_SEQ 1
Expand Down Expand Up @@ -516,6 +523,182 @@ get_dp_netdev(const struct dpif *dpif)
{
return dpif_netdev_cast(dpif)->dp;
}

enum pmd_info_type {
PMD_INFO_SHOW_STATS, /* show how cpu cycles are spent */
PMD_INFO_CLEAR_STATS /* set the cycles count to 0 */
};

static void
pmd_info_show_stats(struct ds *reply,
struct dp_netdev_pmd_thread *pmd,
unsigned long long stats[DP_N_STATS],
uint64_t cycles[PMD_N_CYCLES])
{
unsigned long long total_packets = 0;
uint64_t total_cycles = 0;
int i;

/* These loops subtracts reference values ('*_zero') from the counters.
* Since loads and stores are relaxed, it might be possible for a '*_zero'
* value to be more recent than the current value we're reading from the
* counter. This is not a big problem, since these numbers are not
* supposed to be too accurate, but we should at least make sure that
* the result is not negative. */
for (i = 0; i < DP_N_STATS; i++) {
if (stats[i] > pmd->stats_zero[i]) {
stats[i] -= pmd->stats_zero[i];
} else {
stats[i] = 0;
}

if (i != DP_STAT_LOST) {
/* Lost packets are already included in DP_STAT_MISS */
total_packets += stats[i];
}
}

for (i = 0; i < PMD_N_CYCLES; i++) {
if (cycles[i] > pmd->cycles_zero[i]) {
cycles[i] -= pmd->cycles_zero[i];
} else {
cycles[i] = 0;
}

total_cycles += cycles[i];
}

ds_put_cstr(reply, (pmd->core_id == NON_PMD_CORE_ID)
? "main thread" : "pmd thread");

if (pmd->numa_id != OVS_NUMA_UNSPEC) {
ds_put_format(reply, " numa_id %d", pmd->numa_id);
}
if (pmd->core_id != OVS_CORE_UNSPEC) {
ds_put_format(reply, " core_id %d", pmd->core_id);
}
ds_put_cstr(reply, ":\n");

ds_put_format(reply,
"\temc hits:%llu\n\tmegaflow hits:%llu\n"
"\tmiss:%llu\n\tlost:%llu\n",
stats[DP_STAT_EXACT_HIT], stats[DP_STAT_MASKED_HIT],
stats[DP_STAT_MISS], stats[DP_STAT_LOST]);

if (total_cycles == 0) {
return;
}

ds_put_format(reply,
"\tpolling cycles:%"PRIu64" (%.02f%%)\n"
"\tprocessing cycles:%"PRIu64" (%.02f%%)\n",
cycles[PMD_CYCLES_POLLING],
cycles[PMD_CYCLES_POLLING] / (double)total_cycles * 100,
cycles[PMD_CYCLES_PROCESSING],
cycles[PMD_CYCLES_PROCESSING] / (double)total_cycles * 100);

if (total_packets == 0) {
return;
}

ds_put_format(reply,
"\tavg cycles per packet: %.02f (%"PRIu64"/%llu)\n",
total_cycles / (double)total_packets,
total_cycles, total_packets);

ds_put_format(reply,
"\tavg processing cycles per packet: "
"%.02f (%"PRIu64"/%llu)\n",
cycles[PMD_CYCLES_PROCESSING] / (double)total_packets,
cycles[PMD_CYCLES_PROCESSING], total_packets);
}

static void
pmd_info_clear_stats(struct ds *reply OVS_UNUSED,
struct dp_netdev_pmd_thread *pmd,
unsigned long long stats[DP_N_STATS],
uint64_t cycles[PMD_N_CYCLES])
{
int i;

/* We cannot write 'stats' and 'cycles' (because they're written by other
* threads) and we shouldn't change 'stats' (because they're used to count
* datapath stats, which must not be cleared here). Instead, we save the
* current values and subtract them from the values to be displayed in the
* future */
for (i = 0; i < DP_N_STATS; i++) {
pmd->stats_zero[i] = stats[i];
}
for (i = 0; i < PMD_N_CYCLES; i++) {
pmd->cycles_zero[i] = cycles[i];
}
}

static void
dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
void *aux)
{
struct ds reply = DS_EMPTY_INITIALIZER;
struct dp_netdev_pmd_thread *pmd;
struct dp_netdev *dp = NULL;
enum pmd_info_type type = *(enum pmd_info_type *) aux;

ovs_mutex_lock(&dp_netdev_mutex);

if (argc == 2) {
dp = shash_find_data(&dp_netdevs, argv[1]);
} else if (shash_count(&dp_netdevs) == 1) {
/* There's only one datapath */
dp = shash_first(&dp_netdevs)->data;
}

if (!dp) {
ovs_mutex_unlock(&dp_netdev_mutex);
unixctl_command_reply_error(conn,
"please specify an existing datapath");
return;
}

CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
unsigned long long stats[DP_N_STATS];
uint64_t cycles[PMD_N_CYCLES];
int i;

/* Read current stats and cycle counters */
for (i = 0; i < ARRAY_SIZE(stats); i++) {
atomic_read_relaxed(&pmd->stats.n[i], &stats[i]);
}
for (i = 0; i < ARRAY_SIZE(cycles); i++) {
atomic_read_relaxed(&pmd->cycles.n[i], &cycles[i]);
}

if (type == PMD_INFO_CLEAR_STATS) {
pmd_info_clear_stats(&reply, pmd, stats, cycles);
} else if (type == PMD_INFO_SHOW_STATS) {
pmd_info_show_stats(&reply, pmd, stats, cycles);
}
}

ovs_mutex_unlock(&dp_netdev_mutex);

unixctl_command_reply(conn, ds_cstr(&reply));
ds_destroy(&reply);
}

static int
dpif_netdev_init(void)
{
static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
clear_aux = PMD_INFO_CLEAR_STATS;

unixctl_command_register("dpif-netdev/pmd-stats-show", "[dp]",
0, 1, dpif_netdev_pmd_info,
(void *)&show_aux);
unixctl_command_register("dpif-netdev/pmd-stats-clear", "[dp]",
0, 1, dpif_netdev_pmd_info,
(void *)&clear_aux);
return 0;
}

static int
dpif_netdev_enumerate(struct sset *all_dps,
Expand Down Expand Up @@ -3353,7 +3536,7 @@ dp_netdev_execute_actions(struct dp_netdev_pmd_thread *pmd,

const struct dpif_class dpif_netdev_class = {
"netdev",
NULL, /* init */
dpif_netdev_init,
dpif_netdev_enumerate,
dpif_netdev_port_open_type,
dpif_netdev_open,
Expand Down
18 changes: 18 additions & 0 deletions vswitchd/ovs-vswitchd.8.in
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,24 @@ type).
..
.so lib/dpctl.man
.
.SS "DPIF-NETDEV COMMANDS"
These commands are used to expose internal information (mostly statistics)
about the ``dpif-netdev'' userspace datapath. If there is only one datapath
(as is often the case, unless \fBdpctl/\fR commands are used), the \fIdp\fR
argument can be omitted.
.IP "\fBdpif-netdev/pmd-stats-show\fR [\fIdp\fR]"
Shows performance statistics for each pmd thread of the datapath \fIdp\fR.
The special thread ``main'' sums up the statistics of every non pmd thread.
The sum of ``emc hits'', ``masked hits'' and ``miss'' is the number of
packets received by the datapath. Cycles are counted using the TSC or similar
facilities (when available on the platform). To reset these counters use
\fBdpif-netdev/pmd-stats-clear\fR. The duration of one cycle depends on the
measuring infrastructure.
.IP "\fBdpif-netdev/pmd-stats-clear\fR [\fIdp\fR]"
Resets to zero the per pmd thread performance numbers shown by the
\fBdpif-netdev/pmd-stats-show\fR command. It will NOT reset datapath or
bridge statistics, only the values shown by the above command.
.
.so ofproto/ofproto-dpif-unixctl.man
.so ofproto/ofproto-unixctl.man
.so lib/vlog-unixctl.man
Expand Down

0 comments on commit 6553d06

Please sign in to comment.