Skip to content

Commit

Permalink
lib/sort: avoid indirect calls to built-in swap
Browse files Browse the repository at this point in the history
Similar to what's being done in the net code, this takes advantage of
the fact that most invocations use only a few common swap functions, and
replaces indirect calls to them with (highly predictable) conditional
branches.  (The downside, of course, is that if you *do* use a custom
swap function, there are a few extra predicted branches on the code
path.)

This actually *shrinks* the x86-64 code, because it inlines the various
swap functions inside do_swap, eliding function prologues & epilogues.

x86-64 code size 767 -> 703 bytes (-64)

Link: http://lkml.kernel.org/r/d10c5d4b393a1847f32f5b26f4bbaa2857140e1e.1552704200.git.lkml@sdf.org
Signed-off-by: George Spelvin <[email protected]>
Acked-by: Andrey Abramov <[email protected]>
Acked-by: Rasmus Villemoes <[email protected]>
Reviewed-by: Andy Shevchenko <[email protected]>
Cc: Daniel Wagner <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Don Mullis <[email protected]>
Cc: Geert Uytterhoeven <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
George Spelvin authored and torvalds committed May 15, 2019
1 parent 22a241c commit 8fb583c
Showing 1 changed file with 36 additions and 15 deletions.
51 changes: 36 additions & 15 deletions lib/sort.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,8 @@ static bool is_aligned(const void *base, size_t size, unsigned char align)
* subtract (since the intervening mov instructions don't alter the flags).
* Gcc 8.1.0 doesn't have that problem.
*/
static void swap_words_32(void *a, void *b, int size)
static void swap_words_32(void *a, void *b, size_t n)
{
size_t n = (unsigned int)size;

do {
u32 t = *(u32 *)(a + (n -= 4));
*(u32 *)(a + n) = *(u32 *)(b + n);
Expand All @@ -80,10 +78,8 @@ static void swap_words_32(void *a, void *b, int size)
* but it's possible to have 64-bit loads without 64-bit pointers (e.g.
* x32 ABI). Are there any cases the kernel needs to worry about?
*/
static void swap_words_64(void *a, void *b, int size)
static void swap_words_64(void *a, void *b, size_t n)
{
size_t n = (unsigned int)size;

do {
#ifdef CONFIG_64BIT
u64 t = *(u64 *)(a + (n -= 8));
Expand All @@ -109,17 +105,42 @@ static void swap_words_64(void *a, void *b, int size)
*
* This is the fallback if alignment doesn't allow using larger chunks.
*/
static void swap_bytes(void *a, void *b, int size)
static void swap_bytes(void *a, void *b, size_t n)
{
size_t n = (unsigned int)size;

do {
char t = ((char *)a)[--n];
((char *)a)[n] = ((char *)b)[n];
((char *)b)[n] = t;
} while (n);
}

typedef void (*swap_func_t)(void *a, void *b, int size);

/*
* The values are arbitrary as long as they can't be confused with
* a pointer, but small integers make for the smallest compare
* instructions.
*/
#define SWAP_WORDS_64 (swap_func_t)0
#define SWAP_WORDS_32 (swap_func_t)1
#define SWAP_BYTES (swap_func_t)2

/*
* The function pointer is last to make tail calls most efficient if the
* compiler decides not to inline this function.
*/
static void do_swap(void *a, void *b, size_t size, swap_func_t swap_func)
{
if (swap_func == SWAP_WORDS_64)
swap_words_64(a, b, size);
else if (swap_func == SWAP_WORDS_32)
swap_words_32(a, b, size);
else if (swap_func == SWAP_BYTES)
swap_bytes(a, b, size);
else
swap_func(a, b, (int)size);
}

/**
* parent - given the offset of the child, find the offset of the parent.
* @i: the offset of the heap element whose parent is sought. Non-zero.
Expand Down Expand Up @@ -157,7 +178,7 @@ static size_t parent(size_t i, unsigned int lsbit, size_t size)
* This function does a heapsort on the given array. You may provide
* a swap_func function if you need to do something more than a memory
* copy (e.g. fix up pointers or auxiliary data), but the built-in swap
* isn't usually a bottleneck.
* avoids a slow retpoline and so is significantly faster.
*
* Sorting time is O(n log n) both on average and worst-case. While
* quicksort is slightly faster on average, it suffers from exploitable
Expand All @@ -177,11 +198,11 @@ void sort(void *base, size_t num, size_t size,

if (!swap_func) {
if (is_aligned(base, size, 8))
swap_func = swap_words_64;
swap_func = SWAP_WORDS_64;
else if (is_aligned(base, size, 4))
swap_func = swap_words_32;
swap_func = SWAP_WORDS_32;
else
swap_func = swap_bytes;
swap_func = SWAP_BYTES;
}

/*
Expand All @@ -197,7 +218,7 @@ void sort(void *base, size_t num, size_t size,
if (a) /* Building heap: sift down --a */
a -= size;
else if (n -= size) /* Sorting: Extract root to --n */
swap_func(base, base + n, size);
do_swap(base, base + n, size, swap_func);
else /* Sort complete */
break;

Expand All @@ -224,7 +245,7 @@ void sort(void *base, size_t num, size_t size,
c = b; /* Where "a" belongs */
while (b != a) { /* Shift it into place */
b = parent(b, lsbit, size);
swap_func(base + b, base + c, size);
do_swap(base + b, base + c, size, swap_func);
}
}
}
Expand Down

0 comments on commit 8fb583c

Please sign in to comment.