Skip to content

Commit

Permalink
Merge tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/tip/tip

Pull preempt RT updates from Thomas Gleixner:
 "Introduce preempt_[dis|enable_nested() and use it to clean up various
  places which have open coded PREEMPT_RT conditionals.

  On PREEMPT_RT enabled kernels, spinlocks and rwlocks are neither
  disabling preemption nor interrupts. Though there are a few places
  which depend on the implicit preemption/interrupt disable of those
  locks, e.g. seqcount write sections, per CPU statistics updates etc.

  PREEMPT_RT added open coded CONFIG_PREEMPT_RT conditionals to
  disable/enable preemption in the related code parts all over the
  place. That's hard to read and does not really explain why this is
  necessary.

  Linus suggested to use helper functions (preempt_disable_nested() and
  preempt_enable_nested()) and use those in the affected places. On !RT
  enabled kernels these functions are NOPs, but contain a lockdep assert
  to validate that preemption is actually disabled to catch call sites
  which do not have preemption disabled.

  Clean up the affected code paths in mm, dentry and lib"

* tag 'sched-rt-2022-10-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  u64_stats: Streamline the implementation
  flex_proportions: Disable preemption entering the write section.
  mm/compaction: Get rid of RT ifdeffery
  mm/memcontrol: Replace the PREEMPT_RT conditionals
  mm/debug: Provide VM_WARN_ON_IRQS_ENABLED()
  mm/vmstat: Use preempt_[dis|en]able_nested()
  dentry: Use preempt_[dis|en]able_nested()
  preempt: Provide preempt_[dis|en]able_nested()
  • Loading branch information
torvalds committed Oct 10, 2022
2 parents 65f109e + 44b0c29 commit 7f6dcff
Show file tree
Hide file tree
Showing 10 changed files with 144 additions and 134 deletions.
13 changes: 2 additions & 11 deletions fs/dcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -2597,15 +2597,7 @@ EXPORT_SYMBOL(d_rehash);

static inline unsigned start_dir_add(struct inode *dir)
{
/*
* The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT
* kernels spin_lock() implicitly disables preemption, but not on
* PREEMPT_RT. So for RT it has to be done explicitly to protect
* the sequence count write side critical section against a reader
* or another writer preempting, which would result in a live lock.
*/
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
preempt_disable_nested();
for (;;) {
unsigned n = dir->i_dir_seq;
if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n)
Expand All @@ -2618,8 +2610,7 @@ static inline void end_dir_add(struct inode *dir, unsigned int n,
wait_queue_head_t *d_wait)
{
smp_store_release(&dir->i_dir_seq, n + 2);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
preempt_enable_nested();
wake_up_all(d_wait);
}

Expand Down
6 changes: 6 additions & 0 deletions include/linux/mmdebug.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,12 @@ void dump_mm(const struct mm_struct *mm);
#define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond)
#endif

#ifdef CONFIG_DEBUG_VM_IRQSOFF
#define VM_WARN_ON_IRQS_ENABLED() WARN_ON_ONCE(!irqs_disabled())
#else
#define VM_WARN_ON_IRQS_ENABLED() do { } while (0)
#endif

#ifdef CONFIG_DEBUG_VIRTUAL
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
#else
Expand Down
42 changes: 42 additions & 0 deletions include/linux/preempt.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,4 +421,46 @@ static inline void migrate_enable(void) { }

#endif /* CONFIG_SMP */

/**
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
*
* Use for code which requires preemption protection inside a critical
* section which has preemption disabled implicitly on non-PREEMPT_RT
* enabled kernels, by e.g.:
* - holding a spinlock/rwlock
* - soft interrupt context
* - regular interrupt handlers
*
* On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft
* interrupt context and regular interrupt handlers are preemptible and
* only prevent migration. preempt_disable_nested() ensures that preemption
* is disabled for cases which require CPU local serialization even on
* PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP.
*
* The use cases are code sequences which are not serialized by a
* particular lock instance, e.g.:
* - seqcount write side critical sections where the seqcount is not
* associated to a particular lock and therefore the automatic
* protection mechanism does not work. This prevents a live lock
* against a preempting high priority reader.
* - RMW per CPU variable updates like vmstat.
*/
/* Macro to avoid header recursion hell vs. lockdep */
#define preempt_disable_nested() \
do { \
if (IS_ENABLED(CONFIG_PREEMPT_RT)) \
preempt_disable(); \
else \
lockdep_assert_preemption_disabled(); \
} while (0)

/**
* preempt_enable_nested - Undo the effect of preempt_disable_nested()
*/
static __always_inline void preempt_enable_nested(void)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
}

#endif /* __LINUX_PREEMPT_H */
145 changes: 64 additions & 81 deletions include/linux/u64_stats_sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*
* Key points :
*
* - Use a seqcount on 32-bit SMP, only disable preemption for 32-bit UP.
* - Use a seqcount on 32-bit
* - The whole thing is a no-op on 64-bit architectures.
*
* Usage constraints:
Expand All @@ -20,7 +20,8 @@
* writer and also spin forever.
*
* 3) Write side must use the _irqsave() variant if other writers, or a reader,
* can be invoked from an IRQ context.
* can be invoked from an IRQ context. On 64bit systems this variant does not
* disable interrupts.
*
* 4) If reader fetches several counters, there is no guarantee the whole values
* are consistent w.r.t. each other (remember point #2: seqcounts are not
Expand All @@ -29,11 +30,6 @@
* 5) Readers are allowed to sleep or be preempted/interrupted: they perform
* pure reads.
*
* 6) Readers must use both u64_stats_fetch_{begin,retry}_irq() if the stats
* might be updated from a hardirq or softirq context (remember point #1:
* seqcounts are not used for UP kernels). 32-bit UP stat readers could read
* corrupted 64-bit values otherwise.
*
* Usage :
*
* Stats producer (writer) should use following template granted it already got
Expand Down Expand Up @@ -66,7 +62,7 @@
#include <linux/seqlock.h>

struct u64_stats_sync {
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
#if BITS_PER_LONG == 32
seqcount_t seq;
#endif
};
Expand Down Expand Up @@ -98,7 +94,22 @@ static inline void u64_stats_inc(u64_stats_t *p)
local64_inc(&p->v);
}

#else
static inline void u64_stats_init(struct u64_stats_sync *syncp) { }
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp) { }
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp) { }
static inline unsigned long __u64_stats_irqsave(void) { return 0; }
static inline void __u64_stats_irqrestore(unsigned long flags) { }
static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
return 0;
}
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
return false;
}

#else /* 64 bit */

typedef struct {
u64 v;
Expand All @@ -123,123 +134,95 @@ static inline void u64_stats_inc(u64_stats_t *p)
{
p->v++;
}
#endif

#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq)
#else
static inline void u64_stats_init(struct u64_stats_sync *syncp)
{
seqcount_init(&syncp->seq);
}
#endif

static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
preempt_disable_nested();
write_seqcount_begin(&syncp->seq);
#endif
}

static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
static inline void __u64_stats_update_end(struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
#endif
preempt_enable_nested();
}

static inline unsigned long
u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
static inline unsigned long __u64_stats_irqsave(void)
{
unsigned long flags = 0;
unsigned long flags;

#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_disable();
else
local_irq_save(flags);
write_seqcount_begin(&syncp->seq);
#endif
local_irq_save(flags);
return flags;
}

static inline void
u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
unsigned long flags)
static inline void __u64_stats_irqrestore(unsigned long flags)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
write_seqcount_end(&syncp->seq);
if (IS_ENABLED(CONFIG_PREEMPT_RT))
preempt_enable();
else
local_irq_restore(flags);
#endif
local_irq_restore(flags);
}

static inline unsigned int __u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_begin(&syncp->seq);
#else
return 0;
#endif
}

static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
preempt_disable();
#endif
return __u64_stats_fetch_begin(syncp);
return read_seqcount_retry(&syncp->seq, start);
}
#endif /* !64 bit */

static inline bool __u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && (defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT))
return read_seqcount_retry(&syncp->seq, start);
#else
return false;
#endif
__u64_stats_update_begin(syncp);
}

static inline void u64_stats_update_end(struct u64_stats_sync *syncp)
{
__u64_stats_update_end(syncp);
}

static inline unsigned long u64_stats_update_begin_irqsave(struct u64_stats_sync *syncp)
{
unsigned long flags = __u64_stats_irqsave();

__u64_stats_update_begin(syncp);
return flags;
}

static inline void u64_stats_update_end_irqrestore(struct u64_stats_sync *syncp,
unsigned long flags)
{
__u64_stats_update_end(syncp);
__u64_stats_irqrestore(flags);
}

static inline unsigned int u64_stats_fetch_begin(const struct u64_stats_sync *syncp)
{
return __u64_stats_fetch_begin(syncp);
}

static inline bool u64_stats_fetch_retry(const struct u64_stats_sync *syncp,
unsigned int start)
{
#if BITS_PER_LONG == 32 && (!defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT))
preempt_enable();
#endif
return __u64_stats_fetch_retry(syncp, start);
}

/*
* In case irq handlers can update u64 counters, readers can use following helpers
* - SMP 32bit arches use seqcount protection, irq safe.
* - UP 32bit must disable irqs.
* - 64bit have no problem atomically reading u64 values, irq safe.
*/
/* Obsolete interfaces */
static inline unsigned int u64_stats_fetch_begin_irq(const struct u64_stats_sync *syncp)
{
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
preempt_disable();
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
local_irq_disable();
#endif
return __u64_stats_fetch_begin(syncp);
return u64_stats_fetch_begin(syncp);
}

static inline bool u64_stats_fetch_retry_irq(const struct u64_stats_sync *syncp,
unsigned int start)
{
#if BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT_RT)
preempt_enable();
#elif BITS_PER_LONG == 32 && !defined(CONFIG_SMP)
local_irq_enable();
#endif
return __u64_stats_fetch_retry(syncp, start);
return u64_stats_fetch_retry(syncp, start);
}

#endif /* _LINUX_U64_STATS_SYNC_H */
3 changes: 3 additions & 0 deletions lib/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -805,6 +805,9 @@ config ARCH_HAS_DEBUG_VM_PGTABLE
An architecture should select this when it can successfully
build and run DEBUG_VM_PGTABLE.

config DEBUG_VM_IRQSOFF
def_bool DEBUG_VM && !PREEMPT_RT

config DEBUG_VM
bool "Debug VM"
depends on DEBUG_KERNEL
Expand Down
2 changes: 2 additions & 0 deletions lib/flex_proportions.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,15 @@ bool fprop_new_period(struct fprop_global *p, int periods)
*/
if (events <= 1)
return false;
preempt_disable_nested();
write_seqcount_begin(&p->sequence);
if (periods < 64)
events -= events >> periods;
/* Use addition to avoid losing events happening between sum and set */
percpu_counter_add(&p->events, -events);
p->period += periods;
write_seqcount_end(&p->sequence);
preempt_enable_nested();

return true;
}
Expand Down
6 changes: 6 additions & 0 deletions mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,12 @@ config COMPACTION
it and then we would be really interested to hear about that at
[email protected].

config COMPACT_UNEVICTABLE_DEFAULT
int
depends on COMPACTION
default 0 if PREEMPT_RT
default 1

#
# support for free page reporting
config PAGE_REPORTING
Expand Down
6 changes: 1 addition & 5 deletions mm/compaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -1727,11 +1727,7 @@ typedef enum {
* Allow userspace to control policy on scanning the unevictable LRU for
* compactable pages.
*/
#ifdef CONFIG_PREEMPT_RT
int sysctl_compact_unevictable_allowed __read_mostly = 0;
#else
int sysctl_compact_unevictable_allowed __read_mostly = 1;
#endif
int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNEVICTABLE_DEFAULT;

static inline void
update_fast_start_pfn(struct compact_control *cc, unsigned long pfn)
Expand Down
Loading

0 comments on commit 7f6dcff

Please sign in to comment.