Skip to content

Commit

Permalink
sched,perf: Fix periodic timers
Browse files Browse the repository at this point in the history
In the below two commits (see Fixes) we have periodic timers that can
stop themselves when they're no longer required, but need to be
(re)-started when their idle condition changes.

Further complications is that we want the timer handler to always do
the forward such that it will always correctly deal with the overruns,
and we do not want to race such that the handler has already decided
to stop, but the (external) restart sees the timer still active and we
end up with a 'lost' timer.

The problem with the current code is that the re-start can come before
the callback does the forward, at which point the forward from the
callback will WARN about forwarding an enqueued timer.

Now, conceptually its easy to detect if you're before or after the fwd
by comparing the expiration time against the current time. Of course,
that's expensive (and racy) because we don't have the current time.

Alternatively one could cache this state inside the timer, but then
everybody pays the overhead of maintaining this extra state, and that
is undesired.

The only other option that I could see is the external timer_active
variable, which I tried to kill before. I would love a nicer interface
for this seemingly simple 'problem' but alas.

Fixes: 272325c ("perf: Fix mux_interval hrtimer wreckage")
Fixes: 77a4d1a ("sched: Cleanup bandwidth timers")
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: [email protected]
Cc: Sasha Levin <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
  • Loading branch information
Peter Zijlstra authored and KAGA-KOKO committed May 18, 2015
1 parent c78c881 commit 4cfafd3
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 33 deletions.
4 changes: 4 additions & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,8 +566,12 @@ struct perf_cpu_context {
struct perf_event_context *task_ctx;
int active_oncpu;
int exclusive;

raw_spinlock_t hrtimer_lock;
struct hrtimer hrtimer;
ktime_t hrtimer_interval;
unsigned int hrtimer_active;

struct pmu *unique_pmu;
struct perf_cgroup *cgrp;
};
Expand Down
29 changes: 16 additions & 13 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -752,24 +752,21 @@ perf_cgroup_mark_enabled(struct perf_event *event,
static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr)
{
struct perf_cpu_context *cpuctx;
enum hrtimer_restart ret = HRTIMER_NORESTART;
int rotations = 0;

WARN_ON(!irqs_disabled());

cpuctx = container_of(hr, struct perf_cpu_context, hrtimer);

rotations = perf_rotate_context(cpuctx);

/*
* arm timer if needed
*/
if (rotations) {
raw_spin_lock(&cpuctx->hrtimer_lock);
if (rotations)
hrtimer_forward_now(hr, cpuctx->hrtimer_interval);
ret = HRTIMER_RESTART;
}
else
cpuctx->hrtimer_active = 0;
raw_spin_unlock(&cpuctx->hrtimer_lock);

return ret;
return rotations ? HRTIMER_RESTART : HRTIMER_NORESTART;
}

static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)
Expand All @@ -792,23 +789,29 @@ static void __perf_mux_hrtimer_init(struct perf_cpu_context *cpuctx, int cpu)

cpuctx->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * interval);

hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
raw_spin_lock_init(&cpuctx->hrtimer_lock);
hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
timer->function = perf_mux_hrtimer_handler;
}

static int perf_mux_hrtimer_restart(struct perf_cpu_context *cpuctx)
{
struct hrtimer *timer = &cpuctx->hrtimer;
struct pmu *pmu = cpuctx->ctx.pmu;
unsigned long flags;

/* not for SW PMU */
if (pmu->task_ctx_nr == perf_sw_context)
return 0;

if (hrtimer_is_queued(timer))
return 0;
raw_spin_lock_irqsave(&cpuctx->hrtimer_lock, flags);
if (!cpuctx->hrtimer_active) {
cpuctx->hrtimer_active = 1;
hrtimer_forward_now(timer, cpuctx->hrtimer_interval);
hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED);
}
raw_spin_unlock_irqrestore(&cpuctx->hrtimer_lock, flags);

hrtimer_start(timer, cpuctx->hrtimer_interval, HRTIMER_MODE_REL_PINNED);
return 0;
}

Expand Down
12 changes: 0 additions & 12 deletions kernel/sched/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,18 +90,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/sched.h>

void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
{
/*
* Do not forward the expiration time of active timers;
* we do not want to loose an overrun.
*/
if (!hrtimer_active(period_timer))
hrtimer_forward_now(period_timer, period);

hrtimer_start_expires(period_timer, HRTIMER_MODE_ABS_PINNED);
}

DEFINE_MUTEX(sched_domains_mutex);
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

Expand Down
17 changes: 13 additions & 4 deletions kernel/sched/fair.c
Original file line number Diff line number Diff line change
Expand Up @@ -3870,8 +3870,9 @@ static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
if (runtime_refresh_within(cfs_b, min_left))
return;

start_bandwidth_timer(&cfs_b->slack_timer,
ns_to_ktime(cfs_bandwidth_slack_period));
hrtimer_start(&cfs_b->slack_timer,
ns_to_ktime(cfs_bandwidth_slack_period),
HRTIMER_MODE_REL);
}

/* we know any runtime found here is valid as update_curr() precedes return */
Expand Down Expand Up @@ -4012,6 +4013,8 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)

idle = do_sched_cfs_period_timer(cfs_b, overrun);
}
if (idle)
cfs_b->period_active = 0;
raw_spin_unlock(&cfs_b->lock);

return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
Expand All @@ -4025,7 +4028,7 @@ void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
cfs_b->period = ns_to_ktime(default_cfs_period());

INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
cfs_b->period_timer.function = sched_cfs_period_timer;
hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cfs_b->slack_timer.function = sched_cfs_slack_timer;
Expand All @@ -4039,7 +4042,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)

void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
{
start_bandwidth_timer(&cfs_b->period_timer, cfs_b->period);
lockdep_assert_held(&cfs_b->lock);

if (!cfs_b->period_active) {
cfs_b->period_active = 1;
hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
}
}

static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
Expand Down
8 changes: 7 additions & 1 deletion kernel/sched/rt.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer)
idle = do_sched_rt_period_timer(rt_b, overrun);
raw_spin_lock(&rt_b->rt_runtime_lock);
}
if (idle)
rt_b->rt_period_active = 0;
raw_spin_unlock(&rt_b->rt_runtime_lock);

return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
Expand All @@ -54,7 +56,11 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
return;

raw_spin_lock(&rt_b->rt_runtime_lock);
start_bandwidth_timer(&rt_b->rt_period_timer, rt_b->rt_period);
if (!rt_b->rt_period_active) {
rt_b->rt_period_active = 1;
hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
}
raw_spin_unlock(&rt_b->rt_runtime_lock);
}

Expand Down
5 changes: 2 additions & 3 deletions kernel/sched/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ struct rt_bandwidth {
ktime_t rt_period;
u64 rt_runtime;
struct hrtimer rt_period_timer;
unsigned int rt_period_active;
};

void __dl_clear_params(struct task_struct *p);
Expand Down Expand Up @@ -215,7 +216,7 @@ struct cfs_bandwidth {
s64 hierarchical_quota;
u64 runtime_expires;

int idle;
int idle, period_active;
struct hrtimer period_timer, slack_timer;
struct list_head throttled_cfs_rq;

Expand Down Expand Up @@ -1406,8 +1407,6 @@ static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta) { }
static inline void sched_avg_update(struct rq *rq) { }
#endif

extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);

/*
* __task_rq_lock - lock the rq @p resides on.
*/
Expand Down

0 comments on commit 4cfafd3

Please sign in to comment.