Skip to content

Commit

Permalink
[PATCH] per-task-delay-accounting: delay accounting usage of taskstat…
Browse files Browse the repository at this point in the history
…s interface

Usage of taskstats interface by delay accounting.

Signed-off-by: Shailabh Nagar <[email protected]>
Signed-off-by: Balbir Singh <[email protected]>
Cc: Jes Sorensen <[email protected]>
Cc: Peter Chubb <[email protected]>
Cc: Erich Focht <[email protected]>
Cc: Levent Serinol <[email protected]>
Cc: Jay Lan <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Shailabh Nagar authored and Linus Torvalds committed Jul 15, 2006
1 parent c757249 commit 6f44993
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 7 deletions.
15 changes: 15 additions & 0 deletions include/linux/delayacct.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#define _LINUX_DELAYACCT_H

#include <linux/sched.h>
#include <linux/taskstats_kern.h>

/*
* Per-task flags relevant to delay accounting
Expand All @@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct task_struct *);
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
extern void __delayacct_blkio_end(void);
extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);

static inline void delayacct_set_flag(int flag)
{
Expand Down Expand Up @@ -74,6 +76,16 @@ static inline void delayacct_blkio_end(void)
__delayacct_blkio_end();
}

static inline int delayacct_add_tsk(struct taskstats *d,
struct task_struct *tsk)
{
if (likely(!delayacct_on))
return -EINVAL;
if (!tsk->delays)
return 0;
return __delayacct_add_tsk(d, tsk);
}

#else
static inline void delayacct_set_flag(int flag)
{}
Expand All @@ -89,6 +101,9 @@ static inline void delayacct_blkio_start(void)
{}
static inline void delayacct_blkio_end(void)
{}
static inline int delayacct_add_tsk(struct taskstats *d,
struct task_struct *tsk)
{ return 0; }
#endif /* CONFIG_TASK_DELAY_ACCT */

#endif
1 change: 1 addition & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ struct task_struct {
*/
struct pipe_inode_info *splice_pipe;
#ifdef CONFIG_TASK_DELAY_ACCT
spinlock_t delays_lock;
struct task_delay_info *delays;
#endif
};
Expand Down
55 changes: 54 additions & 1 deletion include/linux/taskstats.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,60 @@
struct taskstats {

/* Version 1 */
__u64 version;
__u16 version;
__u16 padding[3]; /* Userspace should not interpret the padding
* field which can be replaced by useful
* fields if struct taskstats is extended.
*/

/* Delay accounting fields start
*
* All values, until comment "Delay accounting fields end" are
* available only if delay accounting is enabled, even though the last
* few fields are not delays
*
* xxx_count is the number of delay values recorded
* xxx_delay_total is the corresponding cumulative delay in nanoseconds
*
* xxx_delay_total wraps around to zero on overflow
* xxx_count incremented regardless of overflow
*/

/* Delay waiting for cpu, while runnable
* count, delay_total NOT updated atomically
*/
__u64 cpu_count;
__u64 cpu_delay_total;

/* Following four fields atomically updated using task->delays->lock */

/* Delay waiting for synchronous block I/O to complete
* does not account for delays in I/O submission
*/
__u64 blkio_count;
__u64 blkio_delay_total;

/* Delay waiting for page fault I/O (swap in only) */
__u64 swapin_count;
__u64 swapin_delay_total;

/* cpu "wall-clock" running time
* On some architectures, value will adjust for cpu time stolen
* from the kernel in involuntary waits due to virtualization.
* Value is cumulative, in nanoseconds, without a corresponding count
* and wraps around to zero silently on overflow
*/
__u64 cpu_run_real_total;

/* cpu "virtual" running time
* Uses time intervals seen by the kernel i.e. no adjustment
* for kernel's involuntary waits due to virtualization.
* Value is cumulative, in nanoseconds, without a corresponding count
* and wraps around to zero silently on overflow
*/
__u64 cpu_run_virtual_total;
/* Delay accounting fields end */
/* version 1 ends here */
};


Expand Down
1 change: 1 addition & 0 deletions include/linux/taskstats_kern.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ enum {

#ifdef CONFIG_TASKSTATS
extern kmem_cache_t *taskstats_cache;
extern struct mutex taskstats_exit_mutex;

static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
struct taskstats **ptgidstats)
Expand Down
1 change: 1 addition & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ config TASKSTATS

config TASK_DELAY_ACCT
bool "Enable per-task delay accounting (EXPERIMENTAL)"
depends on TASKSTATS
help
Collect information on time spent by a task waiting for system
resources like cpu, synchronous block I/O completion and swapping
Expand Down
62 changes: 61 additions & 1 deletion kernel/delayacct.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,22 @@ void delayacct_init(void)

void __delayacct_tsk_init(struct task_struct *tsk)
{
spin_lock_init(&tsk->delays_lock);
/* No need to acquire tsk->delays_lock for allocation here unless
__delayacct_tsk_init called after tsk is attached to tasklist
*/
tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
if (tsk->delays)
spin_lock_init(&tsk->delays->lock);
}

void __delayacct_tsk_exit(struct task_struct *tsk)
{
kmem_cache_free(delayacct_cache, tsk->delays);
struct task_delay_info *delays = tsk->delays;
spin_lock(&tsk->delays_lock);
tsk->delays = NULL;
spin_unlock(&tsk->delays_lock);
kmem_cache_free(delayacct_cache, delays);
}

/*
Expand Down Expand Up @@ -104,3 +111,56 @@ void __delayacct_blkio_end(void)
&current->delays->blkio_delay,
&current->delays->blkio_count);
}

int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
{
s64 tmp;
struct timespec ts;
unsigned long t1,t2,t3;

spin_lock(&tsk->delays_lock);

/* Though tsk->delays accessed later, early exit avoids
* unnecessary returning of other data
*/
if (!tsk->delays)
goto done;

tmp = (s64)d->cpu_run_real_total;
cputime_to_timespec(tsk->utime + tsk->stime, &ts);
tmp += timespec_to_ns(&ts);
d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;

/*
* No locking available for sched_info (and too expensive to add one)
* Mitigate by taking snapshot of values
*/
t1 = tsk->sched_info.pcnt;
t2 = tsk->sched_info.run_delay;
t3 = tsk->sched_info.cpu_time;

d->cpu_count += t1;

jiffies_to_timespec(t2, &ts);
tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;

tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
d->cpu_run_virtual_total =
(tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;

/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */

spin_lock(&tsk->delays->lock);
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
d->blkio_count += tsk->delays->blkio_count;
d->swapin_count += tsk->delays->swapin_count;
spin_unlock(&tsk->delays->lock);

done:
spin_unlock(&tsk->delays_lock);
return 0;
}
16 changes: 11 additions & 5 deletions kernel/taskstats.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@

#include <linux/kernel.h>
#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
#include <net/genetlink.h>
#include <asm/atomic.h>

static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
static int family_registered;
kmem_cache_t *taskstats_cache;
static DEFINE_MUTEX(taskstats_exit_mutex);

static struct genl_family family = {
.id = GENL_ID_GENERATE,
Expand Down Expand Up @@ -120,7 +120,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
* goto err;
*/

err:
rc = delayacct_add_tsk(stats, tsk);
stats->version = TASKSTATS_VERSION;

/* Define err: label here if needed */
put_task_struct(tsk);
return rc;

Expand Down Expand Up @@ -152,8 +155,14 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
* break;
*/

rc = delayacct_add_tsk(stats, tsk);
if (rc)
break;

} while_each_thread(first, tsk);
read_unlock(&tasklist_lock);
stats->version = TASKSTATS_VERSION;


/*
* Accounting subsytems can also add calls here if they don't
Expand Down Expand Up @@ -233,8 +242,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
if (!family_registered || !tidstats)
return;

mutex_lock(&taskstats_exit_mutex);

is_thread_group = !thread_group_empty(tsk);
rc = 0;

Expand Down Expand Up @@ -292,7 +299,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
err_skb:
nlmsg_free(rep_skb);
ret:
mutex_unlock(&taskstats_exit_mutex);
return;
}

Expand Down

0 comments on commit 6f44993

Please sign in to comment.