Skip to content

Commit

Permalink
ntp: Fix leap-second hrtimer livelock
Browse files Browse the repository at this point in the history
Since commit 7dffa3c the ntp
subsystem has used an hrtimer for triggering the leapsecond
adjustment. However, this can cause a potential livelock.

Thomas diagnosed this as the following pattern:
CPU 0                                                    CPU 1
do_adjtimex()
  spin_lock_irq(&ntp_lock);
    process_adjtimex_modes();				 timer_interrupt()
      process_adj_status();                                do_timer()
        ntp_start_leap_timer();                             write_lock(&xtime_lock);
          hrtimer_start();                                  update_wall_time();
             hrtimer_reprogram();                            ntp_tick_length()
               tick_program_event()                            spin_lock(&ntp_lock);
                 clockevents_program_event()
		   ktime_get()
                     seq = req_seqbegin(xtime_lock);

This patch tries to avoid the problem by reverting back to not using
an hrtimer to inject leapseconds, and instead we handle the leapsecond
processing in the second_overflow() function.

The downside to this change is that on systems that support highres
timers, the leap second processing will occur on a HZ tick boundary,
(ie: ~1-10ms, depending on HZ)  after the leap second instead of
possibly sooner (~34us in my tests w/ x86_64 lapic).

This patch applies on top of tip/timers/core.

CC: Sasha Levin <[email protected]>
CC: Thomas Gleixner <[email protected]>
Reported-by: Sasha Levin <[email protected]>
Diagnoised-by: Thomas Gleixner <[email protected]>
Tested-by: Sasha Levin <[email protected]>
Signed-off-by: John Stultz <[email protected]>
  • Loading branch information
johnstultz-work committed Mar 23, 2012
1 parent 57779dc commit 6b43ae8
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 102 deletions.
2 changes: 1 addition & 1 deletion include/linux/timex.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ extern void ntp_clear(void);
/* Returns how long ticks are at present, in ns / 2^NTP_SCALE_SHIFT. */
extern u64 ntp_tick_length(void);

extern void second_overflow(void);
extern int second_overflow(unsigned long secs);
extern int do_adjtimex(struct timex *);
extern void hardpps(const struct timespec *, const struct timespec *);

Expand Down
128 changes: 41 additions & 87 deletions kernel/time/ntp.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ unsigned long tick_nsec;
static u64 tick_length;
static u64 tick_length_base;

static struct hrtimer leap_timer;

#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
Expand Down Expand Up @@ -381,70 +379,63 @@ u64 ntp_tick_length(void)


/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second.
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills ([email protected]) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*
* Also handles leap second processing, and returns leap offset
*/
static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
int second_overflow(unsigned long secs)
{
enum hrtimer_restart res = HRTIMER_NORESTART;
unsigned long flags;
s64 delta;
int leap = 0;
unsigned long flags;

spin_lock_irqsave(&ntp_lock, flags);

/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second.
*/
switch (time_state) {
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
case TIME_INS:
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
res = HRTIMER_RESTART;
if (secs % 86400 == 0) {
leap = -1;
time_state = TIME_OOP;
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
}
break;
case TIME_DEL:
leap = 1;
time_tai--;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
if ((secs + 1) % 86400 == 0) {
leap = 1;
time_tai--;
time_state = TIME_WAIT;
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
}
break;
case TIME_OOP:
time_tai++;
time_state = TIME_WAIT;
/* fall through */
break;

case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
}
spin_unlock_irqrestore(&ntp_lock, flags);

/*
* We have to call this outside of the ntp_lock to keep
* the proper locking hierarchy
*/
if (leap)
timekeeping_leap_insert(leap);

return res;
}

/*
* this routine handles the overflow of the microsecond field
*
* The tricky bits of code to handle the accurate clock support
* were provided by Dave Mills ([email protected]) of NTP fame.
* They were originally developed for SUN and DEC kernels.
* All the kudos should go to Dave for this stuff.
*/
void second_overflow(void)
{
s64 delta;
unsigned long flags;

spin_lock_irqsave(&ntp_lock, flags);

/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
Expand Down Expand Up @@ -481,8 +472,13 @@ void second_overflow(void)
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;



out:
spin_unlock_irqrestore(&ntp_lock, flags);

return leap;
}

#ifdef CONFIG_GENERIC_CMOS_UPDATE
Expand Down Expand Up @@ -544,27 +540,6 @@ static void notify_cmos_timer(void)
static inline void notify_cmos_timer(void) { }
#endif

/*
* Start the leap seconds timer:
*/
static inline void ntp_start_leap_timer(struct timespec *ts)
{
long now = ts->tv_sec;

if (time_status & STA_INS) {
time_state = TIME_INS;
now += 86400 - now % 86400;
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);

return;
}

if (time_status & STA_DEL) {
time_state = TIME_DEL;
now += 86400 - (now + 1) % 86400;
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
}
}

/*
* Propagate a new txc->status value into the NTP state:
Expand All @@ -589,22 +564,6 @@ static inline void process_adj_status(struct timex *txc, struct timespec *ts)
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;

switch (time_state) {
case TIME_OK:
ntp_start_leap_timer(ts);
break;
case TIME_INS:
case TIME_DEL:
time_state = TIME_OK;
ntp_start_leap_timer(ts);
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
case TIME_OOP:
hrtimer_restart(&leap_timer);
break;
}
}
/*
* Called with the xtime lock held, so we can access and modify
Expand Down Expand Up @@ -686,9 +645,6 @@ int do_adjtimex(struct timex *txc)
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;

if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
hrtimer_cancel(&leap_timer);
}

if (txc->modes & ADJ_SETOFFSET) {
Expand Down Expand Up @@ -1010,6 +966,4 @@ __setup("ntp_tick_adj=", ntp_tick_adj_setup);
void __init ntp_init(void)
{
ntp_clear();
hrtimer_init(&leap_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
leap_timer.function = ntp_leap_second;
}
20 changes: 6 additions & 14 deletions kernel/time/timekeeping.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,18 +184,6 @@ static void timekeeping_update(bool clearntp)
}


void timekeeping_leap_insert(int leapsecond)
{
unsigned long flags;

write_seqlock_irqsave(&timekeeper.lock, flags);
timekeeper.xtime.tv_sec += leapsecond;
timekeeper.wall_to_monotonic.tv_sec -= leapsecond;
timekeeping_update(false);
write_sequnlock_irqrestore(&timekeeper.lock, flags);

}

/**
* timekeeping_forward_now - update clock to the current time
*
Expand Down Expand Up @@ -969,9 +957,11 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)

timekeeper.xtime_nsec += timekeeper.xtime_interval << shift;
while (timekeeper.xtime_nsec >= nsecps) {
int leap;
timekeeper.xtime_nsec -= nsecps;
timekeeper.xtime.tv_sec++;
second_overflow();
leap = second_overflow(timekeeper.xtime.tv_sec);
timekeeper.xtime.tv_sec += leap;
}

/* Accumulate raw time */
Expand Down Expand Up @@ -1082,9 +1072,11 @@ static void update_wall_time(void)
* xtime.tv_nsec isn't larger then NSEC_PER_SEC
*/
if (unlikely(timekeeper.xtime.tv_nsec >= NSEC_PER_SEC)) {
int leap;
timekeeper.xtime.tv_nsec -= NSEC_PER_SEC;
timekeeper.xtime.tv_sec++;
second_overflow();
leap = second_overflow(timekeeper.xtime.tv_sec);
timekeeper.xtime.tv_sec += leap;
}

timekeeping_update(false);
Expand Down

0 comments on commit 6b43ae8

Please sign in to comment.