Skip to content

Commit

Permalink
Merge patch series "RISC-V: mm: Make SV48 the default address space"
Browse files Browse the repository at this point in the history
Charlie Jenkins <[email protected]> says:

Make sv48 the default address space for mmap as some applications
currently depend on this assumption. Users can now select a
desired address space using a non-zero hint address to mmap. Previously,
requesting the default address space from mmap by passing zero as the hint
address would result in using the largest address space possible. Some
applications depend on empty bits in the virtual address space, like Go and
Java, so this patch provides more flexibility for application developers.

* b4-shazam-merge:
  RISC-V: mm: Document mmap changes
  RISC-V: mm: Update pgtable comment documentation
  RISC-V: mm: Add tests for RISC-V mm
  RISC-V: mm: Restrict address space for sv39,sv48,sv57

Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Palmer Dabbelt <[email protected]>
  • Loading branch information
palmer-dabbelt committed Aug 31, 2023
2 parents 52b77c2 + 7998abe commit 94f0038
Show file tree
Hide file tree
Showing 11 changed files with 261 additions and 13 deletions.
22 changes: 22 additions & 0 deletions Documentation/riscv/vm-layout.rst
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,25 @@ RISC-V Linux Kernel SV57
ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
__________________|____________|__________________|_________|____________________________________________________________


Userspace VAs
--------------------
To maintain compatibility with software that relies on the VA space with a
maximum of 48 bits the kernel will, by default, return virtual addresses to
userspace from a 48-bit range (sv48). This default behavior is achieved by
passing 0 into the hint address parameter of mmap. On CPUs with an address space
smaller than sv48, the CPU maximum supported address space will be the default.

Software can "opt-in" to receiving VAs from another VA space by providing
a hint address to mmap. A hint address passed to mmap will cause the largest
address space that fits entirely into the hint to be used, unless there is no
space left in the address space. If there is no space available in the requested
address space, an address in the next smallest available address space will be
returned.

For example, in order to obtain 48-bit VA space, a hint address greater than
:code:`1 << 47` must be provided. Note that this is 47 due to sv48 userspace
ending at :code:`1 << 47` and the addresses beyond this are reserved for the
kernel. Similarly, to obtain 57-bit VA space addresses, a hint address greater
than or equal to :code:`1 << 56` must be provided.
2 changes: 1 addition & 1 deletion arch/riscv/include/asm/elf.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ extern bool compat_elf_check_arch(Elf32_Ehdr *hdr);
* the loader. We need to make sure that it is out of the way of the program
* that it will "exec", and that there is sufficient room for the brk.
*/
#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
#define ELF_ET_DYN_BASE ((DEFAULT_MAP_WINDOW / 3) * 2)

#ifdef CONFIG_64BIT
#ifdef CONFIG_COMPAT
Expand Down
33 changes: 28 additions & 5 deletions arch/riscv/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,16 @@
* struct pages to map half the virtual address space. Then
* position vmemmap directly below the VMALLOC region.
*/
#define VA_BITS_SV32 32
#ifdef CONFIG_64BIT
#define VA_BITS_SV39 39
#define VA_BITS_SV48 48
#define VA_BITS_SV57 57

#define VA_BITS (pgtable_l5_enabled ? \
57 : (pgtable_l4_enabled ? 48 : 39))
VA_BITS_SV57 : (pgtable_l4_enabled ? VA_BITS_SV48 : VA_BITS_SV39))
#else
#define VA_BITS 32
#define VA_BITS VA_BITS_SV32
#endif

#define VMEMMAP_SHIFT \
Expand Down Expand Up @@ -111,11 +116,27 @@
#include <asm/page.h>
#include <asm/tlbflush.h>
#include <linux/mm_types.h>
#include <asm/compat.h>

#define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT)

#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>

#define VA_USER_SV39 (UL(1) << (VA_BITS_SV39 - 1))
#define VA_USER_SV48 (UL(1) << (VA_BITS_SV48 - 1))
#define VA_USER_SV57 (UL(1) << (VA_BITS_SV57 - 1))

#ifdef CONFIG_COMPAT
#define MMAP_VA_BITS_64 ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
#define MMAP_MIN_VA_BITS_64 (VA_BITS_SV39)
#define MMAP_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_VA_BITS_64)
#define MMAP_MIN_VA_BITS (is_compat_task() ? VA_BITS_SV32 : MMAP_MIN_VA_BITS_64)
#else
#define MMAP_VA_BITS ((VA_BITS >= VA_BITS_SV48) ? VA_BITS_SV48 : VA_BITS)
#define MMAP_MIN_VA_BITS (VA_BITS_SV39)
#endif /* CONFIG_COMPAT */

#else
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */
Expand Down Expand Up @@ -830,14 +851,16 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte)
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
* Task size is:
* - 0x9fc00000 (~2.5GB) for RV32.
* - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
* - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
* - 0x9fc00000 (~2.5GB) for RV32.
* - 0x4000000000 ( 256GB) for RV64 using SV39 mmu
* - 0x800000000000 ( 128TB) for RV64 using SV48 mmu
* - 0x100000000000000 ( 64PB) for RV64 using SV57 mmu
*
* Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V
* Instruction Set Manual Volume II: Privileged Architecture" states that
* "load and store effective addresses, which are 64bits, must have bits
* 63–48 all equal to bit 47, or else a page-fault exception will occur."
* Similarly for SV57, bits 63–57 must be equal to bit 56.
*/
#ifdef CONFIG_64BIT
#define TASK_SIZE_64 (PGDIR_SIZE * PTRS_PER_PGD / 2)
Expand Down
52 changes: 46 additions & 6 deletions arch/riscv/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,59 @@

#include <asm/ptrace.h>

#ifdef CONFIG_64BIT
#define DEFAULT_MAP_WINDOW (UL(1) << (MMAP_VA_BITS - 1))
#define STACK_TOP_MAX TASK_SIZE_64

#define arch_get_mmap_end(addr, len, flags) \
({ \
unsigned long mmap_end; \
typeof(addr) _addr = (addr); \
if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
mmap_end = STACK_TOP_MAX; \
else if ((_addr) >= VA_USER_SV57) \
mmap_end = STACK_TOP_MAX; \
else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
mmap_end = VA_USER_SV48; \
else \
mmap_end = VA_USER_SV39; \
mmap_end; \
})

#define arch_get_mmap_base(addr, base) \
({ \
unsigned long mmap_base; \
typeof(addr) _addr = (addr); \
typeof(base) _base = (base); \
unsigned long rnd_gap = DEFAULT_MAP_WINDOW - (_base); \
if ((_addr) == 0 || (IS_ENABLED(CONFIG_COMPAT) && is_compat_task())) \
mmap_base = (_base); \
else if (((_addr) >= VA_USER_SV57) && (VA_BITS >= VA_BITS_SV57)) \
mmap_base = VA_USER_SV57 - rnd_gap; \
else if ((((_addr) >= VA_USER_SV48)) && (VA_BITS >= VA_BITS_SV48)) \
mmap_base = VA_USER_SV48 - rnd_gap; \
else \
mmap_base = VA_USER_SV39 - rnd_gap; \
mmap_base; \
})

#else
#define DEFAULT_MAP_WINDOW TASK_SIZE
#define STACK_TOP_MAX TASK_SIZE
#endif
#define STACK_ALIGN 16

#define STACK_TOP DEFAULT_MAP_WINDOW

/*
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)

#define STACK_TOP TASK_SIZE
#ifdef CONFIG_64BIT
#define STACK_TOP_MAX TASK_SIZE_64
#define TASK_UNMAPPED_BASE PAGE_ALIGN((UL(1) << MMAP_MIN_VA_BITS) / 3)
#else
#define STACK_TOP_MAX TASK_SIZE
#define TASK_UNMAPPED_BASE PAGE_ALIGN(TASK_SIZE / 3)
#endif
#define STACK_ALIGN 16

#ifndef __ASSEMBLY__

Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/riscv/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)

ifneq (,$(filter $(ARCH),riscv))
RISCV_SUBTARGETS ?= hwprobe vector
RISCV_SUBTARGETS ?= hwprobe vector mm
else
RISCV_SUBTARGETS :=
endif
Expand Down
2 changes: 2 additions & 0 deletions tools/testing/selftests/riscv/mm/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
mmap_bottomup
mmap_default
15 changes: 15 additions & 0 deletions tools/testing/selftests/riscv/mm/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2021 ARM Limited
# Originally tools/testing/arm64/abi/Makefile

# Additional include paths needed by kselftest.h and local headers
CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.

TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup

TEST_PROGS := testcases/run_mmap.sh

include ../../lib.mk

$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h
$(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
35 changes: 35 additions & 0 deletions tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <sys/mman.h>
#include <testcases/mmap_test.h>

#include "../../kselftest_harness.h"

TEST(infinite_rlimit)
{
// Only works on 64 bit
#if __riscv_xlen == 64
struct addresses mmap_addresses;

EXPECT_EQ(BOTTOM_UP, memory_layout());

do_mmaps(&mmap_addresses);

EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);

EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
#endif
}

TEST_HARNESS_MAIN
35 changes: 35 additions & 0 deletions tools/testing/selftests/riscv/mm/testcases/mmap_default.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <sys/mman.h>
#include <testcases/mmap_test.h>

#include "../../kselftest_harness.h"

TEST(default_rlimit)
{
// Only works on 64 bit
#if __riscv_xlen == 64
struct addresses mmap_addresses;

EXPECT_EQ(TOP_DOWN, memory_layout());

do_mmaps(&mmap_addresses);

EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);

EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
#endif
}

TEST_HARNESS_MAIN
64 changes: 64 additions & 0 deletions tools/testing/selftests/riscv/mm/testcases/mmap_test.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _TESTCASES_MMAP_TEST_H
#define _TESTCASES_MMAP_TEST_H
#include <sys/mman.h>
#include <sys/resource.h>
#include <stddef.h>

#define TOP_DOWN 0
#define BOTTOM_UP 1

struct addresses {
int *no_hint;
int *on_37_addr;
int *on_38_addr;
int *on_46_addr;
int *on_47_addr;
int *on_55_addr;
int *on_56_addr;
};

static inline void do_mmaps(struct addresses *mmap_addresses)
{
/*
* Place all of the hint addresses on the boundaries of mmap
* sv39, sv48, sv57
* User addresses end at 1<<38, 1<<47, 1<<56 respectively
*/
void *on_37_bits = (void *)(1UL << 37);
void *on_38_bits = (void *)(1UL << 38);
void *on_46_bits = (void *)(1UL << 46);
void *on_47_bits = (void *)(1UL << 47);
void *on_55_bits = (void *)(1UL << 55);
void *on_56_bits = (void *)(1UL << 56);

int prot = PROT_READ | PROT_WRITE;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;

mmap_addresses->no_hint =
mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0);
mmap_addresses->on_37_addr =
mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0);
mmap_addresses->on_38_addr =
mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0);
mmap_addresses->on_46_addr =
mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0);
mmap_addresses->on_47_addr =
mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0);
mmap_addresses->on_55_addr =
mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0);
mmap_addresses->on_56_addr =
mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
}

static inline int memory_layout(void)
{
int prot = PROT_READ | PROT_WRITE;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;

void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0);

return value2 > value1;
}
#endif /* _TESTCASES_MMAP_TEST_H */
12 changes: 12 additions & 0 deletions tools/testing/selftests/riscv/mm/testcases/run_mmap.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0

original_stack_limit=$(ulimit -s)

./mmap_default

# Force mmap_bottomup to be ran with bottomup memory due to
# the unlimited stack
ulimit -s unlimited
./mmap_bottomup
ulimit -s $original_stack_limit

0 comments on commit 94f0038

Please sign in to comment.