Skip to content

Commit

Permalink
powerpc: Account time using timebase rather than PURR
Browse files Browse the repository at this point in the history
Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times.  This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish.  The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.

This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in.  Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.

On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log.  We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called).  So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.

On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval.  This avoids having to
read the SPURR on every kernel entry and exit.  On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.

This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.

Signed-off-by: Paul Mackerras <[email protected]>
Signed-off-by: Benjamin Herrenschmidt <[email protected]>
  • Loading branch information
paulusmack authored and ozbenh committed Sep 2, 2010
1 parent 93c2270 commit cf9efce
Show file tree
Hide file tree
Showing 13 changed files with 290 additions and 194 deletions.
3 changes: 2 additions & 1 deletion arch/powerpc/include/asm/exception-64s.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,8 @@
li r10,0; \
ld r11,exception_marker@toc(r2); \
std r10,RESULT(r1); /* clear regs->result */ \
std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
ACCOUNT_STOLEN_TIME

/*
* Exception vectors.
Expand Down
19 changes: 19 additions & 0 deletions arch/powerpc/include/asm/lppaca.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,25 @@ struct slb_shadow {

extern struct slb_shadow slb_shadow[];

/*
* Layout of entries in the hypervisor's dispatch trace log buffer.
*/
struct dtl_entry {
u8 dispatch_reason;
u8 preempt_reason;
u16 processor_id;
u32 enqueue_to_dispatch_time;
u32 ready_to_enqueue_time;
u32 waiting_to_ready_time;
u64 timebase;
u64 fault_addr;
u64 srr0;
u64 srr1;
};

#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))

#endif /* CONFIG_PPC_BOOK3S */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_LPPACA_H */
10 changes: 9 additions & 1 deletion arch/powerpc/include/asm/paca.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ struct paca_struct {
u8 kexec_state; /* set when kexec down has irqs off */
#ifdef CONFIG_PPC_STD_MMU_64
struct slb_shadow *slb_shadow_ptr;
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;

/*
* Now, starting in cacheline 2, the exception save areas
Expand Down Expand Up @@ -134,8 +136,14 @@ struct paca_struct {
/* Stuff for accurate time accounting */
u64 user_time; /* accumulated usermode TB ticks */
u64 system_time; /* accumulated system TB ticks */
u64 startpurr; /* PURR/TB value snapshot */
u64 user_time_scaled; /* accumulated usermode SPURR ticks */
u64 starttime; /* TB value snapshot */
u64 starttime_user; /* TB value on exit to usermode */
u64 startspurr; /* SPURR value snapshot */
u64 utime_sspurr; /* ->user_time when ->startspurr set */
u64 stolen_time; /* TB ticks taken by hypervisor */
u64 dtl_ridx; /* read index in dispatch log */
struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */

#ifdef CONFIG_KVM_BOOK3S_HANDLER
/* We use this to store guest state in */
Expand Down
50 changes: 31 additions & 19 deletions arch/powerpc/include/asm/ppc_asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <asm/asm-compat.h>
#include <asm/processor.h>
#include <asm/ppc-opcode.h>
#include <asm/firmware.h>

#ifndef __ASSEMBLY__
#error __FILE__ should only be used in assembler files
Expand All @@ -26,37 +27,48 @@
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
#define ACCOUNT_CPU_USER_EXIT(ra, rb)
#define ACCOUNT_STOLEN_TIME
#else
#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \
beq 2f; /* if from kernel mode */ \
BEGIN_FTR_SECTION; \
mfspr ra,SPRN_PURR; /* get processor util. reg */ \
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
BEGIN_FTR_SECTION; \
MFTB(ra); /* or get TB if no PURR */ \
END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
ld rb,PACA_STARTPURR(r13); \
std ra,PACA_STARTPURR(r13); \
MFTB(ra); /* get timebase */ \
ld rb,PACA_STARTTIME_USER(r13); \
std ra,PACA_STARTTIME(r13); \
subf rb,rb,ra; /* subtract start value */ \
ld ra,PACA_USER_TIME(r13); \
add ra,ra,rb; /* add on to user time */ \
std ra,PACA_USER_TIME(r13); \
2:

#define ACCOUNT_CPU_USER_EXIT(ra, rb) \
BEGIN_FTR_SECTION; \
mfspr ra,SPRN_PURR; /* get processor util. reg */ \
END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
BEGIN_FTR_SECTION; \
MFTB(ra); /* or get TB if no PURR */ \
END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
ld rb,PACA_STARTPURR(r13); \
std ra,PACA_STARTPURR(r13); \
MFTB(ra); /* get timebase */ \
ld rb,PACA_STARTTIME(r13); \
std ra,PACA_STARTTIME_USER(r13); \
subf rb,rb,ra; /* subtract start value */ \
ld ra,PACA_SYSTEM_TIME(r13); \
add ra,ra,rb; /* add on to user time */ \
std ra,PACA_SYSTEM_TIME(r13);
#endif
add ra,ra,rb; /* add on to system time */ \
std ra,PACA_SYSTEM_TIME(r13)

#ifdef CONFIG_PPC_SPLPAR
#define ACCOUNT_STOLEN_TIME \
BEGIN_FW_FTR_SECTION; \
beq 33f; \
/* from user - see if there are any DTL entries to process */ \
ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \
ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \
ld r10,LPPACA_DTLIDX(r10); /* get log write index */ \
cmpd cr1,r11,r10; \
beq+ cr1,33f; \
bl .accumulate_stolen_time; \
33: \
END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)

#else /* CONFIG_PPC_SPLPAR */
#define ACCOUNT_STOLEN_TIME

#endif /* CONFIG_PPC_SPLPAR */

#endif /* CONFIG_VIRT_CPU_ACCOUNTING */

/*
* Macros for storing registers into and loading registers from
Expand Down
5 changes: 0 additions & 5 deletions arch/powerpc/include/asm/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ extern void to_tm(int tim, struct rtc_time * tm);
extern void GregorianDay(struct rtc_time *tm);

extern void generic_calibrate_decr(void);
extern void snapshot_timebase(void);

extern void set_dec_cpu6(unsigned int val);

Expand Down Expand Up @@ -212,12 +211,8 @@ struct cpu_usage {
DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);

#if defined(CONFIG_VIRT_CPU_ACCOUNTING)
extern void calculate_steal_time(void);
extern void snapshot_timebases(void);
#define account_process_vtime(tsk) account_process_tick(tsk, 0)
#else
#define calculate_steal_time() do { } while (0)
#define snapshot_timebases() do { } while (0)
#define account_process_vtime(tsk) do { } while (0)
#endif

Expand Down
8 changes: 5 additions & 3 deletions arch/powerpc/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,17 +181,19 @@ int main(void)
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
DEFINE(SLBSHADOW_STACKESID,
offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
#endif /* CONFIG_PPC_STD_MMU_64 */
DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));
DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
Expand Down
18 changes: 18 additions & 0 deletions arch/powerpc/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,24 @@ system_call_common:
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
/* if from user, see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
ld r10,LPPACA_DTLIDX(r10) /* get log write index */
cmpd cr1,r11,r10
beq+ cr1,33f
bl .accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
REST_2GPRS(7,r1)
addi r9,r1,STACK_FRAME_OVERHEAD
33:
END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */

#ifdef CONFIG_TRACE_IRQFLAGS
bl .trace_hardirqs_on
REST_GPR(0,r1)
Expand Down
1 change: 0 additions & 1 deletion arch/powerpc/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev,

account_system_vtime(current);
account_process_vtime(current);
calculate_steal_time();

/*
* We can't take a PMU exception inside _switch() since there is a
Expand Down
5 changes: 0 additions & 5 deletions arch/powerpc/kernel/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused)
if (smp_ops->take_timebase)
smp_ops->take_timebase();

if (system_state > SYSTEM_BOOTING)
snapshot_timebase();

secondary_cpu_time_init();

ipi_call_lock();
Expand Down Expand Up @@ -575,8 +572,6 @@ void __init smp_cpus_done(unsigned int max_cpus)

free_cpumask_var(old_mask);

snapshot_timebases();

dump_numa_cpu_topology();
}

Expand Down
Loading

0 comments on commit cf9efce

Please sign in to comment.