From 191e56880a6a638ce931859317f37deb084b6433 Mon Sep 17 00:00:00 2001 From: Phil Carmody Date: Tue, 22 Mar 2011 16:34:13 -0700 Subject: [PATCH] calibrate: home in on correct lpj value more quickly Binary chop with a jiffy-resync on each step to find an upper bound is slow, so just race in a tight-ish loop to find an underestimate. If done with lots of individual steps, sometimes several hundreds of iterations would be required, which would impose a significant overhead, and make the initial estimate very low. By taking slowly increasing steps there will be less overhead. E.g. an x86_64 2.67GHz could have fitted in 613 individual small delays, but in reality should have been able to fit in a single delay 644 times longer, so underestimated by 31 steps. To reach the equivalent of 644 small delays with the accelerating scheme now requires about 130 iterations, so has <1/4th of the overhead, and can therefore be expected to underestimate by only 7 steps. As now we have a better initial estimate we can binary chop over a smaller range. With the loop overhead in the initial estimate kept low, and the step sizes moderate, we won't have under-estimated by much, so chose as tight a range as we can. Signed-off-by: Phil Carmody Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Tested-by: Stephen Boyd Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/calibrate.c | 57 +++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/init/calibrate.c b/init/calibrate.c index b71643a7acae8e..f9000dfbe2271e 100644 --- a/init/calibrate.c +++ b/init/calibrate.c @@ -110,8 +110,8 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} /* * This is the number of bits of precision for the loops_per_jiffy. Each - * bit takes on average 1.5/HZ seconds. This (like the original) is a little - * better than 1% + * time we refine our estimate after the first takes 1.5/HZ seconds, so try + * to start with a good estimate. * For the boot cpu we can skip the delay calibration and assign it a value * calculated based on the timer frequency. * For the rest of the CPUs we cannot assume that the timer frequency is same as @@ -121,38 +121,49 @@ static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;} static unsigned long __cpuinit calibrate_delay_converge(void) { - unsigned long lpj, ticks, loopbit; - int lps_precision = LPS_PREC; + /* First stage - slowly accelerate to find initial bounds */ + unsigned long lpj, ticks, loopadd, chop_limit; + int trials = 0, band = 0, trial_in_band = 0; lpj = (1<<12); - while ((lpj <<= 1) != 0) { - /* wait for "start of" clock tick */ - ticks = jiffies; - while (ticks == jiffies) - /* nothing */; - /* Go .. */ - ticks = jiffies; - __delay(lpj); - ticks = jiffies - ticks; - if (ticks) - break; - } + + /* wait for "start of" clock tick */ + ticks = jiffies; + while (ticks == jiffies) + ; /* nothing */ + /* Go .. */ + ticks = jiffies; + do { + if (++trial_in_band == (1<> (LPS_PREC + 1); /* * Do a binary approximation to get lpj set to - * equal one clock (up to lps_precision bits) + * equal one clock (up to LPS_PREC bits) */ - lpj >>= 1; - loopbit = lpj; - while (lps_precision-- && (loopbit >>= 1)) { - lpj |= loopbit; + while (loopadd > chop_limit) { + lpj += loopadd; ticks = jiffies; while (ticks == jiffies) - /* nothing */; + ; /* nothing */ ticks = jiffies; __delay(lpj); if (jiffies != ticks) /* longer than 1 tick */ - lpj &= ~loopbit; + lpj -= loopadd; + loopadd >>= 1; } return lpj;