Skip to content

Commit

Permalink
tracing/events: convert block trace points to TRACE_EVENT()
Browse files Browse the repository at this point in the history
TRACE_EVENT is a more generic way to define tracepoints. Doing so adds
these new capabilities to this tracepoint:

  - zero-copy and per-cpu splice() tracing
  - binary tracing without printf overhead
  - structured logging records exposed under /debug/tracing/events
  - trace events embedded in function tracer output and other plugins
  - user-defined, per tracepoint filter expressions
  ...

Cons:

  - no dev_t info for the output of plug, unplug_timer and unplug_io events.
    no dev_t info for getrq and sleeprq events if bio == NULL.
    no dev_t info for rq_abort,...,rq_requeue events if rq->rq_disk == NULL.

    This is mainly because we can't get the deivce from a request queue.
    But this may change in the future.

  - A packet command is converted to a string in TP_assign, not TP_print.
    While blktrace do the convertion just before output.

    Since pc requests should be rather rare, this is not a big issue.

  - In blktrace, an event can have 2 different print formats, but a TRACE_EVENT
    has a unique format, which means we have some unused data in a trace entry.

    The overhead is minimized by using __dynamic_array() instead of __array().

I've benchmarked the ioctl blktrace vs the splice based TRACE_EVENT tracing:

      dd                   dd + ioctl blktrace       dd + TRACE_EVENT (splice)
1     7.36s, 42.7 MB/s     7.50s, 42.0 MB/s          7.41s, 42.5 MB/s
2     7.43s, 42.3 MB/s     7.48s, 42.1 MB/s          7.43s, 42.4 MB/s
3     7.38s, 42.6 MB/s     7.45s, 42.2 MB/s          7.41s, 42.5 MB/s

So the overhead of tracing is very small, and no regression when using
those trace events vs blktrace.

And the binary output of TRACE_EVENT is much smaller than blktrace:

 # ls -l -h
 -rw-r--r-- 1 root root 8.8M 06-09 13:24 sda.blktrace.0
 -rw-r--r-- 1 root root 195K 06-09 13:24 sda.blktrace.1
 -rw-r--r-- 1 root root 2.7M 06-09 13:25 trace_splice.out

Following are some comparisons between TRACE_EVENT and blktrace:

plug:
  kjournald-480   [000]   303.084981: block_plug: [kjournald]
  kjournald-480   [000]   303.084981:   8,0    P   N [kjournald]

unplug_io:
  kblockd/0-118   [000]   300.052973: block_unplug_io: [kblockd/0] 1
  kblockd/0-118   [000]   300.052974:   8,0    U   N [kblockd/0] 1

remap:
  kjournald-480   [000]   303.085042: block_remap: 8,0 W 102736992 + 8 <- (8,8) 33384
  kjournald-480   [000]   303.085043:   8,0    A   W 102736992 + 8 <- (8,8) 33384

bio_backmerge:
  kjournald-480   [000]   303.085086: block_bio_backmerge: 8,0 W 102737032 + 8 [kjournald]
  kjournald-480   [000]   303.085086:   8,0    M   W 102737032 + 8 [kjournald]

getrq:
  kjournald-480   [000]   303.084974: block_getrq: 8,0 W 102736984 + 8 [kjournald]
  kjournald-480   [000]   303.084975:   8,0    G   W 102736984 + 8 [kjournald]

  bash-2066  [001]  1072.953770:   8,0    G   N [bash]
  bash-2066  [001]  1072.953773: block_getrq: 0,0 N 0 + 0 [bash]

rq_complete:
  konsole-2065  [001]   300.053184: block_rq_complete: 8,0 W () 103669040 + 16 [0]
  konsole-2065  [001]   300.053191:   8,0    C   W 103669040 + 16 [0]

  ksoftirqd/1-7   [001]  1072.953811:   8,0    C   N (5a 00 08 00 00 00 00 00 24 00) [0]
  ksoftirqd/1-7   [001]  1072.953813: block_rq_complete: 0,0 N (5a 00 08 00 00 00 00 00 24 00) 0 + 0 [0]

rq_insert:
  kjournald-480   [000]   303.084985: block_rq_insert: 8,0 W 0 () 102736984 + 8 [kjournald]
  kjournald-480   [000]   303.084986:   8,0    I   W 102736984 + 8 [kjournald]

Changelog from v2 -> v3:

- use the newly introduced __dynamic_array().

Changelog from v1 -> v2:

- use __string() instead of __array() to minimize the memory required
  to store hex dump of rq->cmd().

- support large pc requests.

- add missing blk_fill_rwbs_rq() in block_rq_requeue TRACE_EVENT.

- some cleanups.

Signed-off-by: Li Zefan <[email protected]>
LKML-Reference: <[email protected]>
Signed-off-by: Steven Rostedt <[email protected]>
  • Loading branch information
Li Zefan authored and rostedt committed Jun 9, 2009
1 parent f57a8a1 commit 5578213
Show file tree
Hide file tree
Showing 10 changed files with 588 additions and 104 deletions.
16 changes: 4 additions & 12 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,14 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
#include <trace/block.h>

#define CREATE_TRACE_POINTS
#include <trace/events/block.h>

#include "blk.h"

DEFINE_TRACE(block_plug);
DEFINE_TRACE(block_unplug_io);
DEFINE_TRACE(block_unplug_timer);
DEFINE_TRACE(block_getrq);
DEFINE_TRACE(block_sleeprq);
DEFINE_TRACE(block_rq_requeue);
DEFINE_TRACE(block_bio_backmerge);
DEFINE_TRACE(block_bio_frontmerge);
DEFINE_TRACE(block_bio_queue);
DEFINE_TRACE(block_rq_complete);
DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */
EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);

static int __make_request(struct request_queue *q, struct bio *bio);

Expand Down
8 changes: 2 additions & 6 deletions block/elevator.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,16 @@
#include <linux/compiler.h>
#include <linux/delay.h>
#include <linux/blktrace_api.h>
#include <trace/block.h>
#include <linux/hash.h>
#include <linux/uaccess.h>

#include <trace/events/block.h>

#include "blk.h"

static DEFINE_SPINLOCK(elv_list_lock);
static LIST_HEAD(elv_list);

DEFINE_TRACE(block_rq_abort);

/*
* Merge hash stuff.
*/
Expand All @@ -55,9 +54,6 @@ static const int elv_hash_shift = 6;
#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))

DEFINE_TRACE(block_rq_insert);
DEFINE_TRACE(block_rq_issue);

/*
* Query io scheduler to see if the current process issuing bio may be
* merged with rq.
Expand Down
5 changes: 2 additions & 3 deletions drivers/md/dm.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
#include <linux/idr.h>
#include <linux/hdreg.h>
#include <linux/blktrace_api.h>
#include <trace/block.h>

#include <trace/events/block.h>

#define DM_MSG_PREFIX "core"

Expand Down Expand Up @@ -53,8 +54,6 @@ struct dm_target_io {
union map_info info;
};

DEFINE_TRACE(block_bio_complete);

/*
* For request-based dm.
* One of these is allocated per request.
Expand Down
3 changes: 1 addition & 2 deletions fs/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/blktrace_api.h>
#include <trace/block.h>
#include <scsi/sg.h> /* for struct sg_iovec */

DEFINE_TRACE(block_split);
#include <trace/events/block.h>

/*
* Test patch to inline a certain number of bi_io_vec's inside the bio
Expand Down
13 changes: 13 additions & 0 deletions include/linux/blktrace_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,5 +218,18 @@ static inline int blk_trace_init_sysfs(struct device *dev)

#endif /* CONFIG_BLK_DEV_IO_TRACE */

#ifdef CONFIG_EVENT_TRACING

static inline int blk_cmd_buf_len(struct request *rq)
{
return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
}

extern void blk_dump_cmd(char *buf, struct request *rq);
extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);

#endif /* CONFIG_EVENT_TRACING */

#endif /* __KERNEL__ */
#endif
76 changes: 0 additions & 76 deletions include/trace/block.h

This file was deleted.

Loading

0 comments on commit 5578213

Please sign in to comment.