Skip to content

Commit

Permalink
sched: fix process time monotonicity
Browse files Browse the repository at this point in the history
Spencer reported a problem where utime and stime were going negative despite
the fixes in commit b27f03d. The suspected
reason for the problem is that signal_struct maintains it's own utime and
stime (of exited tasks), these are not updated using the new task_utime()
routine, hence sig->utime can go backwards and cause the same problem
to occur (sig->utime, adds tsk->utime and not task_utime()). This patch
fixes the problem

TODO: using max(task->prev_utime, derived utime) works for now, but a more
generic solution is to implement cputime_max() and use the cputime_gt()
function for comparison.

Reported-by: [email protected]
Signed-off-by: Balbir Singh <[email protected]>
Signed-off-by: Peter Zijlstra <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Balbir Singh authored and Ingo Molnar committed Sep 5, 2008
1 parent 56c7426 commit 4904862
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 62 deletions.
59 changes: 0 additions & 59 deletions fs/proc/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
return 0;
}

/*
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
static cputime_t task_utime(struct task_struct *p)
{
return p->utime;
}

static cputime_t task_stime(struct task_struct *p)
{
return p->stime;
}
#else
static cputime_t task_utime(struct task_struct *p)
{
clock_t utime = cputime_to_clock_t(p->utime),
total = utime + cputime_to_clock_t(p->stime);
u64 temp;

/*
* Use CFS's precise accounting:
*/
temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);

if (total) {
temp *= utime;
do_div(temp, total);
}
utime = (clock_t)temp;

p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
return p->prev_utime;
}

static cputime_t task_stime(struct task_struct *p)
{
clock_t stime;

/*
* Use CFS's precise accounting. (we subtract utime from
* the total, to make sure the total observed by userspace
* grows monotonically - apps rely on that):
*/
stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
cputime_to_clock_t(task_utime(p));

if (stime >= 0)
p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));

return p->prev_stime;
}
#endif

static cputime_t task_gtime(struct task_struct *p)
{
return p->gtime;
}

static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task, int whole)
{
Expand Down
4 changes: 4 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -1475,6 +1475,10 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}

extern cputime_t task_utime(struct task_struct *p);
extern cputime_t task_stime(struct task_struct *p);
extern cputime_t task_gtime(struct task_struct *p);

/*
* Per process flags
*/
Expand Down
6 changes: 3 additions & 3 deletions kernel/exit.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
sig->utime = cputime_add(sig->utime, tsk->utime);
sig->stime = cputime_add(sig->stime, tsk->stime);
sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->utime = cputime_add(sig->utime, task_utime(tsk));
sig->stime = cputime_add(sig->stime, task_stime(tsk));
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
Expand Down
59 changes: 59 additions & 0 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -4178,6 +4178,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
cpustat->steal = cputime64_add(cpustat->steal, tmp);
}

/*
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t task_utime(struct task_struct *p)
{
return p->utime;
}

cputime_t task_stime(struct task_struct *p)
{
return p->stime;
}
#else
cputime_t task_utime(struct task_struct *p)
{
clock_t utime = cputime_to_clock_t(p->utime),
total = utime + cputime_to_clock_t(p->stime);
u64 temp;

/*
* Use CFS's precise accounting:
*/
temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);

if (total) {
temp *= utime;
do_div(temp, total);
}
utime = (clock_t)temp;

p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
return p->prev_utime;
}

cputime_t task_stime(struct task_struct *p)
{
clock_t stime;

/*
* Use CFS's precise accounting. (we subtract utime from
* the total, to make sure the total observed by userspace
* grows monotonically - apps rely on that):
*/
stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
cputime_to_clock_t(task_utime(p));

if (stime >= 0)
p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));

return p->prev_stime;
}
#endif

inline cputime_t task_gtime(struct task_struct *p)
{
return p->gtime;
}

/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
Expand Down

0 comments on commit 4904862

Please sign in to comment.