Skip to content

Commit

Permalink
Merge tag 'bitmap-6.1-rc1' of https://github.com/norov/linux
Browse files Browse the repository at this point in the history
Pull bitmap updates from Yury Norov:

 - Fix unsigned comparison to -1 in CPUMAP_FILE_MAX_BYTES (Phil Auld)

 - cleanup nr_cpu_ids vs nr_cpumask_bits mess (me)

   This series cleans that mess and adds new config FORCE_NR_CPUS that
   allows to optimize cpumask subsystem if the number of CPUs is known
   at compile-time.

 - optimize find_bit() functions (me)

   Reworks find_bit() functions based on new FIND_{FIRST,NEXT}_BIT()
   macros.

 - add find_nth_bit() (me)

   Adds find_nth_bit(), which is ~70 times faster than bitcounting with
   for_each() loop:

	for_each_set_bit(bit, mask, size)
		if (n-- == 0)
			return bit;

   Also adds bitmap_weight_and() to let people replace this pattern:

	tmp = bitmap_alloc(nbits);
	bitmap_and(tmp, map1, map2, nbits);
	weight = bitmap_weight(tmp, nbits);
	bitmap_free(tmp);

   with a single bitmap_weight_and() call.

 - repair cpumask_check() (me)

   After switching cpumask to use nr_cpu_ids, cpumask_check() started
   generating many false-positive warnings. This series fixes it.

 - Add for_each_cpu_andnot() and for_each_cpu_andnot() (Valentin
   Schneider)

   Extends the API with one more function and applies it in sched/core.

* tag 'bitmap-6.1-rc1' of https://github.com/norov/linux: (28 commits)
  sched/core: Merge cpumask_andnot()+for_each_cpu() into for_each_cpu_andnot()
  lib/test_cpumask: Add for_each_cpu_and(not) tests
  cpumask: Introduce for_each_cpu_andnot()
  lib/find_bit: Introduce find_next_andnot_bit()
  cpumask: fix checking valid cpu range
  lib/bitmap: add tests for for_each() loops
  lib/find: optimize for_each() macros
  lib/bitmap: introduce for_each_set_bit_wrap() macro
  lib/find_bit: add find_next{,_and}_bit_wrap
  cpumask: switch for_each_cpu{,_not} to use for_each_bit()
  net: fix cpu_max_bits_warn() usage in netif_attrmask_next{,_and}
  cpumask: add cpumask_nth_{,and,andnot}
  lib/bitmap: remove bitmap_ord_to_pos
  lib/bitmap: add tests for find_nth_bit()
  lib: add find_nth{,_and,_andnot}_bit()
  lib/bitmap: add bitmap_weight_and()
  lib/bitmap: don't call __bitmap_weight() in kernel code
  tools: sync find_bit() implementation
  lib/find_bit: optimize find_next_bit() functions
  lib/find_bit: create find_first_zero_bit_le()
  ...
  • Loading branch information
torvalds committed Oct 10, 2022
2 parents cdf072a + 585463f commit d4013bc
Show file tree
Hide file tree
Showing 23 changed files with 1,035 additions and 369 deletions.
2 changes: 1 addition & 1 deletion arch/loongarch/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ static void __init prefill_possible_map(void)
for (; i < NR_CPUS; i++)
set_cpu_possible(i, false);

nr_cpu_ids = possible;
set_nr_cpu_ids(possible);
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion arch/mips/kernel/setup.c
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ static void __init prefill_possible_map(void)
for (; i < NR_CPUS; i++)
set_cpu_possible(i, false);

nr_cpu_ids = possible;
set_nr_cpu_ids(possible);
}
#else
static inline void prefill_possible_map(void) {}
Expand Down
4 changes: 4 additions & 0 deletions arch/powerpc/kernel/head_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,12 @@ generic_secondary_common_init:
#else
LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */
ld r8,0(r8) /* Get base vaddr of array */
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
LOAD_REG_IMMEDIATE(r7, NR_CPUS)
#else
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
lwz r7,0(r7) /* also the max paca allocated */
#endif
li r5,0 /* logical cpu id */
1:
sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -1316,7 +1316,7 @@ static void __init smp_sanity_check(void)
nr++;
}

nr_cpu_ids = 8;
set_nr_cpu_ids(8);
}
#endif

Expand Down Expand Up @@ -1569,7 +1569,7 @@ __init void prefill_possible_map(void)
possible = i;
}

nr_cpu_ids = possible;
set_nr_cpu_ids(possible);

pr_info("Allowing %d CPUs, %d hotplug CPUs\n",
possible, max_t(int, possible - num_processors, 0));
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/xen/smp_pv.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ static void __init _get_smp_config(unsigned int early)
* hypercall to expand the max number of VCPUs an already
* running guest has. So cap it up to X. */
if (subtract)
nr_cpu_ids = nr_cpu_ids - subtract;
set_nr_cpu_ids(nr_cpu_ids - subtract);
#endif

}
Expand Down
4 changes: 2 additions & 2 deletions fs/ntfs3/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ static int wnd_rescan(struct wnd_bitmap *wnd)

buf = (ulong *)bh->b_data;

used = __bitmap_weight(buf, wbits);
used = bitmap_weight(buf, wbits);
if (used < wbits) {
frb = wbits - used;
wnd->free_bits[iw] = frb;
Expand Down Expand Up @@ -1364,7 +1364,7 @@ int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits)
buf = (ulong *)bh->b_data;

__bitmap_clear(buf, b0, blocksize * 8 - b0);
frb = wbits - __bitmap_weight(buf, wbits);
frb = wbits - bitmap_weight(buf, wbits);
wnd->total_zeroes += frb - wnd->free_bits[iw];
wnd->free_bits[iw] = frb;

Expand Down
13 changes: 12 additions & 1 deletion include/linux/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct device;
* bitmap_empty(src, nbits) Are all bits zero in *src?
* bitmap_full(src, nbits) Are all bits set in *src?
* bitmap_weight(src, nbits) Hamming Weight: number set bits
* bitmap_weight_and(src1, src2, nbits) Hamming Weight of and'ed bitmap
* bitmap_set(dst, pos, nbits) Set specified bit area
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
Expand Down Expand Up @@ -164,6 +165,8 @@ bool __bitmap_intersects(const unsigned long *bitmap1,
bool __bitmap_subset(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
unsigned int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
unsigned int __bitmap_weight_and(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
void __bitmap_set(unsigned long *map, unsigned int start, int len);
void __bitmap_clear(unsigned long *map, unsigned int start, int len);

Expand Down Expand Up @@ -222,7 +225,6 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n
#else
#define bitmap_copy_le bitmap_copy
#endif
unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits);
int bitmap_print_to_pagebuf(bool list, char *buf,
const unsigned long *maskp, int nmaskbits);

Expand Down Expand Up @@ -439,6 +441,15 @@ unsigned int bitmap_weight(const unsigned long *src, unsigned int nbits)
return __bitmap_weight(src, nbits);
}

static __always_inline
unsigned long bitmap_weight_and(const unsigned long *src1,
const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
return hweight_long(*src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits));
return __bitmap_weight_and(src1, src2, nbits);
}

static __always_inline void bitmap_set(unsigned long *map, unsigned int start,
unsigned int nbits)
{
Expand Down
19 changes: 19 additions & 0 deletions include/linux/bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,25 @@ static inline unsigned long __ffs64(u64 word)
return __ffs((unsigned long)word);
}

/**
* fns - find N'th set bit in a word
* @word: The word to search
* @n: Bit to find
*/
static inline unsigned long fns(unsigned long word, unsigned int n)
{
unsigned int bit;

while (word) {
bit = __ffs(word);
if (n-- == 0)
return bit;
__clear_bit(bit, &word);
}

return BITS_PER_LONG;
}

/**
* assign_bit - Assign value to a bit in memory
* @nr: the bit to set
Expand Down
132 changes: 97 additions & 35 deletions include/linux/cpumask.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,23 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
*/
#define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp)

#if NR_CPUS == 1
#define nr_cpu_ids 1U
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
#define nr_cpu_ids ((unsigned int)NR_CPUS)
#else
extern unsigned int nr_cpu_ids;
#endif

#ifdef CONFIG_CPUMASK_OFFSTACK
/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also,
* not all bits may be allocated. */
#define nr_cpumask_bits nr_cpu_ids
static inline void set_nr_cpu_ids(unsigned int nr)
{
#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
WARN_ON(nr != nr_cpu_ids);
#else
#define nr_cpumask_bits ((unsigned int)NR_CPUS)
nr_cpu_ids = nr;
#endif
}

/* Deprecated. Always use nr_cpu_ids. */
#define nr_cpumask_bits nr_cpu_ids

/*
* The following particular system cpumasks and operations manage
Expand All @@ -67,10 +71,6 @@ extern unsigned int nr_cpu_ids;
* cpu_online_mask is the dynamic subset of cpu_present_mask,
* indicating those CPUs available for scheduling.
*
* If HOTPLUG is enabled, then cpu_possible_mask is forced to have
* all NR_CPUS bits set, otherwise it is just the set of CPUs that
* ACPI reports present at boot.
*
* If HOTPLUG is enabled, then cpu_present_mask varies dynamically,
* depending on what ACPI reports as currently plugged in, otherwise
* cpu_present_mask is just a copy of cpu_possible_mask.
Expand Down Expand Up @@ -174,9 +174,8 @@ static inline unsigned int cpumask_last(const struct cpumask *srcp)
static inline
unsigned int cpumask_next(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
/* n is a prior cpu */
cpumask_check(n + 1);
return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n + 1);
}

Expand All @@ -189,9 +188,8 @@ unsigned int cpumask_next(int n, const struct cpumask *srcp)
*/
static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
{
/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
/* n is a prior cpu */
cpumask_check(n + 1);
return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
}

Expand Down Expand Up @@ -231,9 +229,8 @@ static inline
unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
const struct cpumask *src2p)
{
/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
/* n is a prior cpu */
cpumask_check(n + 1);
return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
nr_cpumask_bits, n + 1);
}
Expand All @@ -246,9 +243,7 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu(cpu, mask) \
for ((cpu) = -1; \
(cpu) = cpumask_next((cpu), (mask)), \
(cpu) < nr_cpu_ids;)
for_each_set_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)

/**
* for_each_cpu_not - iterate over every cpu in a complemented mask
Expand All @@ -258,17 +253,15 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu_not(cpu, mask) \
for ((cpu) = -1; \
(cpu) = cpumask_next_zero((cpu), (mask)), \
(cpu) < nr_cpu_ids;)
for_each_clear_bit(cpu, cpumask_bits(mask), nr_cpumask_bits)

#if NR_CPUS == 1
static inline
unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
{
cpumask_check(start);
if (n != -1)
cpumask_check(n);
/* n is a prior cpu */
cpumask_check(n + 1);

/*
* Return the first available CPU when wrapping, or when starting before cpu0,
Expand All @@ -293,10 +286,8 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
*
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu_wrap(cpu, mask, start) \
for ((cpu) = cpumask_next_wrap((start)-1, (mask), (start), false); \
(cpu) < nr_cpumask_bits; \
(cpu) = cpumask_next_wrap((cpu), (mask), (start), true))
#define for_each_cpu_wrap(cpu, mask, start) \
for_each_set_bit_wrap(cpu, cpumask_bits(mask), nr_cpumask_bits, start)

/**
* for_each_cpu_and - iterate over every cpu in both masks
Expand All @@ -313,9 +304,25 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu_and(cpu, mask1, mask2) \
for ((cpu) = -1; \
(cpu) = cpumask_next_and((cpu), (mask1), (mask2)), \
(cpu) < nr_cpu_ids;)
for_each_and_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)

/**
* for_each_cpu_andnot - iterate over every cpu present in one mask, excluding
* those present in another.
* @cpu: the (optionally unsigned) integer iterator
* @mask1: the first cpumask pointer
* @mask2: the second cpumask pointer
*
* This saves a temporary CPU mask in many places. It is equivalent to:
* struct cpumask tmp;
* cpumask_andnot(&tmp, &mask1, &mask2);
* for_each_cpu(cpu, &tmp)
* ...
*
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu_andnot(cpu, mask1, mask2) \
for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), nr_cpumask_bits)

/**
* cpumask_any_but - return a "random" in a cpumask, but not this one.
Expand All @@ -337,6 +344,50 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
return i;
}

/**
* cpumask_nth - get the first cpu in a cpumask
* @srcp: the cpumask pointer
* @cpu: the N'th cpu to find, starting from 0
*
* Returns >= nr_cpu_ids if such cpu doesn't exist.
*/
static inline unsigned int cpumask_nth(unsigned int cpu, const struct cpumask *srcp)
{
return find_nth_bit(cpumask_bits(srcp), nr_cpumask_bits, cpumask_check(cpu));
}

/**
* cpumask_nth_and - get the first cpu in 2 cpumasks
* @srcp1: the cpumask pointer
* @srcp2: the cpumask pointer
* @cpu: the N'th cpu to find, starting from 0
*
* Returns >= nr_cpu_ids if such cpu doesn't exist.
*/
static inline
unsigned int cpumask_nth_and(unsigned int cpu, const struct cpumask *srcp1,
const struct cpumask *srcp2)
{
return find_nth_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
nr_cpumask_bits, cpumask_check(cpu));
}

/**
* cpumask_nth_andnot - get the first cpu set in 1st cpumask, and clear in 2nd.
* @srcp1: the cpumask pointer
* @srcp2: the cpumask pointer
* @cpu: the N'th cpu to find, starting from 0
*
* Returns >= nr_cpu_ids if such cpu doesn't exist.
*/
static inline
unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
const struct cpumask *srcp2)
{
return find_nth_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2),
nr_cpumask_bits, cpumask_check(cpu));
}

#define CPU_BITS_NONE \
{ \
[0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
Expand Down Expand Up @@ -586,6 +637,17 @@ static inline unsigned int cpumask_weight(const struct cpumask *srcp)
return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits);
}

/**
* cpumask_weight_and - Count of bits in (*srcp1 & *srcp2)
* @srcp1: the cpumask to count bits (< nr_cpu_ids) in.
* @srcp2: the cpumask to count bits (< nr_cpu_ids) in.
*/
static inline unsigned int cpumask_weight_and(const struct cpumask *srcp1,
const struct cpumask *srcp2)
{
return bitmap_weight_and(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits);
}

/**
* cpumask_shift_right - *dstp = *srcp >> n
* @dstp: the cpumask result
Expand Down
Loading

0 comments on commit d4013bc

Please sign in to comment.