Skip to content

Commit

Permalink
sched, x86: clean up hrtick implementation
Browse files Browse the repository at this point in the history
random uvesafb failures were reported against Gentoo:

  http://bugs.gentoo.org/show_bug.cgi?id=222799

and Mihai Moldovan bisected it back to:

> 8f4d37e is first bad commit
> commit 8f4d37e
> Author: Peter Zijlstra <[email protected]>
> Date:   Fri Jan 25 21:08:29 2008 +0100
>
>    sched: high-res preemption tick

Linus suspected it to be hrtick + vm86 interaction and observed:

> Btw, Peter, Ingo: I think that commit is doing bad things. They aren't
> _incorrect_ per se, but they are definitely bad.
>
> Why?
>
> Using random _TIF_WORK_MASK flags is really impolite for doing
> "scheduling" work. There's a reason that arch/x86/kernel/entry_32.S
> special-cases the _TIF_NEED_RESCHED flag: we don't want to exit out of
> vm86 mode unnecessarily.
>
> See the "work_notifysig_v86" label, and how it does that
> "save_v86_state()" thing etc etc.

Right, I never liked having to fiddle with those TIF flags. Initially I
needed it because the hrtimer base lock could not nest in the rq lock.
That however is fixed these days.

Currently the only reason left to fiddle with the TIF flags is remote
wakeups. We cannot program a remote cpu's hrtimer. I've been thinking
about using the new and improved IPI function call stuff to implement
hrtimer_start_on().

However that does require that smp_call_function_single(.wait=0) works
from interrupt context - /me looks at the latest series from Jens - Yes
that does seem to be supported, good.

Here's a stab at cleaning this stuff up ...

Mihai reported test success as well.

Signed-off-by: Peter Zijlstra <[email protected]>
Tested-by: Mihai Moldovan <[email protected]>
Cc: Michal Januszewski <[email protected]>
Cc: Antonino Daplas <[email protected]>
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Jul 20, 2008
1 parent 577b4a5 commit 3165651
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 155 deletions.
3 changes: 0 additions & 3 deletions arch/x86/kernel/signal_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -667,8 +667,5 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);

if (thread_info_flags & _TIF_HRTICK_RESCHED)
hrtick_resched();

clear_thread_flag(TIF_IRET);
}
3 changes: 0 additions & 3 deletions arch/x86/kernel/signal_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -502,9 +502,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs);

if (thread_info_flags & _TIF_HRTICK_RESCHED)
hrtick_resched();
}

void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
Expand Down
4 changes: 1 addition & 3 deletions include/asm-x86/thread_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* 32bit process */
#define TIF_FORK 18 /* ret_from_fork */
Expand All @@ -108,7 +107,6 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
Expand All @@ -132,7 +130,7 @@ struct thread_info {

/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED)
(_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY)

/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
Expand Down
2 changes: 1 addition & 1 deletion kernel/Kconfig.hz
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,4 @@ config HZ
default 1000 if HZ_1000

config SCHED_HRTICK
def_bool HIGH_RES_TIMERS && X86
def_bool HIGH_RES_TIMERS
202 changes: 60 additions & 142 deletions kernel/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -571,8 +571,10 @@ struct rq {
#endif

#ifdef CONFIG_SCHED_HRTICK
unsigned long hrtick_flags;
ktime_t hrtick_expire;
#ifdef CONFIG_SMP
int hrtick_csd_pending;
struct call_single_data hrtick_csd;
#endif
struct hrtimer hrtick_timer;
#endif

Expand Down Expand Up @@ -983,13 +985,6 @@ static struct rq *this_rq_lock(void)
return rq;
}

static void __resched_task(struct task_struct *p, int tif_bit);

static inline void resched_task(struct task_struct *p)
{
__resched_task(p, TIF_NEED_RESCHED);
}

#ifdef CONFIG_SCHED_HRTICK
/*
* Use HR-timers to deliver accurate preemption points.
Expand All @@ -1001,25 +996,6 @@ static inline void resched_task(struct task_struct *p)
* When we get rescheduled we reprogram the hrtick_timer outside of the
* rq->lock.
*/
static inline void resched_hrt(struct task_struct *p)
{
__resched_task(p, TIF_HRTICK_RESCHED);
}

static inline void resched_rq(struct rq *rq)
{
unsigned long flags;

spin_lock_irqsave(&rq->lock, flags);
resched_task(rq->curr);
spin_unlock_irqrestore(&rq->lock, flags);
}

enum {
HRTICK_SET, /* re-programm hrtick_timer */
HRTICK_RESET, /* not a new slice */
HRTICK_BLOCK, /* stop hrtick operations */
};

/*
* Use hrtick when:
Expand All @@ -1030,72 +1006,17 @@ static inline int hrtick_enabled(struct rq *rq)
{
if (!sched_feat(HRTICK))
return 0;
if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
if (!cpu_online(cpu_of(rq)))
return 0;
return hrtimer_is_hres_active(&rq->hrtick_timer);
}

/*
* Called to set the hrtick timer state.
*
* called with rq->lock held and irqs disabled
*/
static void hrtick_start(struct rq *rq, u64 delay, int reset)
{
assert_spin_locked(&rq->lock);

/*
* preempt at: now + delay
*/
rq->hrtick_expire =
ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
/*
* indicate we need to program the timer
*/
__set_bit(HRTICK_SET, &rq->hrtick_flags);
if (reset)
__set_bit(HRTICK_RESET, &rq->hrtick_flags);

/*
* New slices are called from the schedule path and don't need a
* forced reschedule.
*/
if (reset)
resched_hrt(rq->curr);
}

static void hrtick_clear(struct rq *rq)
{
if (hrtimer_active(&rq->hrtick_timer))
hrtimer_cancel(&rq->hrtick_timer);
}

/*
* Update the timer from the possible pending state.
*/
static void hrtick_set(struct rq *rq)
{
ktime_t time;
int set, reset;
unsigned long flags;

WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());

spin_lock_irqsave(&rq->lock, flags);
set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
time = rq->hrtick_expire;
clear_thread_flag(TIF_HRTICK_RESCHED);
spin_unlock_irqrestore(&rq->lock, flags);

if (set) {
hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
if (reset && !hrtimer_active(&rq->hrtick_timer))
resched_rq(rq);
} else
hrtick_clear(rq);
}

/*
* High-resolution timer tick.
* Runs from hardirq context with interrupts disabled.
Expand All @@ -1115,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
}

#ifdef CONFIG_SMP
static void hotplug_hrtick_disable(int cpu)
/*
* called from hardirq (IPI) context
*/
static void __hrtick_start(void *arg)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;

spin_lock_irqsave(&rq->lock, flags);
rq->hrtick_flags = 0;
__set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
spin_unlock_irqrestore(&rq->lock, flags);
struct rq *rq = arg;

hrtick_clear(rq);
spin_lock(&rq->lock);
hrtimer_restart(&rq->hrtick_timer);
rq->hrtick_csd_pending = 0;
spin_unlock(&rq->lock);
}

static void hotplug_hrtick_enable(int cpu)
/*
* Called to set the hrtick timer state.
*
* called with rq->lock held and irqs disabled
*/
static void hrtick_start(struct rq *rq, u64 delay)
{
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
struct hrtimer *timer = &rq->hrtick_timer;
ktime_t time = ktime_add_ns(timer->base->get_time(), delay);

spin_lock_irqsave(&rq->lock, flags);
__clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
spin_unlock_irqrestore(&rq->lock, flags);
timer->expires = time;

if (rq == this_rq()) {
hrtimer_restart(timer);
} else if (!rq->hrtick_csd_pending) {
__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
rq->hrtick_csd_pending = 1;
}
}

static int
Expand All @@ -1150,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DOWN_PREPARE_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
hotplug_hrtick_disable(cpu);
return NOTIFY_OK;

case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
hotplug_hrtick_enable(cpu);
hrtick_clear(cpu_rq(cpu));
return NOTIFY_OK;
}

Expand All @@ -1170,46 +1092,45 @@ static void init_hrtick(void)
{
hotcpu_notifier(hotplug_hrtick, 0);
}
#endif /* CONFIG_SMP */
#else
/*
* Called to set the hrtick timer state.
*
* called with rq->lock held and irqs disabled
*/
static void hrtick_start(struct rq *rq, u64 delay)
{
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
}

static void init_rq_hrtick(struct rq *rq)
static void init_hrtick(void)
{
rq->hrtick_flags = 0;
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
}
#endif /* CONFIG_SMP */

void hrtick_resched(void)
static void init_rq_hrtick(struct rq *rq)
{
struct rq *rq;
unsigned long flags;
#ifdef CONFIG_SMP
rq->hrtick_csd_pending = 0;

if (!test_thread_flag(TIF_HRTICK_RESCHED))
return;
rq->hrtick_csd.flags = 0;
rq->hrtick_csd.func = __hrtick_start;
rq->hrtick_csd.info = rq;
#endif

local_irq_save(flags);
rq = cpu_rq(smp_processor_id());
hrtick_set(rq);
local_irq_restore(flags);
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
rq->hrtick_timer.function = hrtick;
rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
}
#else
static inline void hrtick_clear(struct rq *rq)
{
}

static inline void hrtick_set(struct rq *rq)
{
}

static inline void init_rq_hrtick(struct rq *rq)
{
}

void hrtick_resched(void)
{
}

static inline void init_hrtick(void)
{
}
Expand All @@ -1228,16 +1149,16 @@ static inline void init_hrtick(void)
#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
#endif

static void __resched_task(struct task_struct *p, int tif_bit)
static void resched_task(struct task_struct *p)
{
int cpu;

assert_spin_locked(&task_rq(p)->lock);

if (unlikely(test_tsk_thread_flag(p, tif_bit)))
if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
return;

set_tsk_thread_flag(p, tif_bit);
set_tsk_thread_flag(p, TIF_NEED_RESCHED);

cpu = task_cpu(p);
if (cpu == smp_processor_id())
Expand Down Expand Up @@ -1303,10 +1224,10 @@ void wake_up_idle_cpu(int cpu)
#endif /* CONFIG_NO_HZ */

#else /* !CONFIG_SMP */
static void __resched_task(struct task_struct *p, int tif_bit)
static void resched_task(struct task_struct *p)
{
assert_spin_locked(&task_rq(p)->lock);
set_tsk_thread_flag(p, tif_bit);
set_tsk_need_resched(p);
}
#endif /* CONFIG_SMP */

Expand Down Expand Up @@ -4395,7 +4316,7 @@ asmlinkage void __sched schedule(void)
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu, hrtick = sched_feat(HRTICK);
int cpu;

need_resched:
preempt_disable();
Expand All @@ -4410,7 +4331,7 @@ asmlinkage void __sched schedule(void)

schedule_debug(prev);

if (hrtick)
if (sched_feat(HRTICK))
hrtick_clear(rq);

/*
Expand Down Expand Up @@ -4457,9 +4378,6 @@ asmlinkage void __sched schedule(void)
} else
spin_unlock_irq(&rq->lock);

if (hrtick)
hrtick_set(rq);

if (unlikely(reacquire_kernel_lock(current) < 0))
goto need_resched_nonpreemptible;

Expand Down
5 changes: 2 additions & 3 deletions kernel/sched_fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
#ifdef CONFIG_SCHED_HRTICK
static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
{
int requeue = rq->curr == p;
struct sched_entity *se = &p->se;
struct cfs_rq *cfs_rq = cfs_rq_of(se);

Expand All @@ -899,10 +898,10 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
* Don't schedule slices shorter than 10000ns, that just
* doesn't make sense. Rely on vruntime for fairness.
*/
if (!requeue)
if (rq->curr != p)
delta = max(10000LL, delta);

hrtick_start(rq, delta, requeue);
hrtick_start(rq, delta);
}
}
#else /* !CONFIG_SCHED_HRTICK */
Expand Down

0 comments on commit 3165651

Please sign in to comment.