Skip to content

Commit

Permalink
Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/arm64/linux

Pull arm64 fixes from Catalin Marinas:

 - Set the minimum GCC version to 5.1 for arm64 due to earlier compiler
   bugs.

 - Make atomic helpers __always_inline to avoid a section mismatch when
   compiling with clang.

 - Fix the CMA and crashkernel reservations to use ZONE_DMA (remove the
   arm64_dma32_phys_limit variable, no longer needed with a dynamic
   ZONE_DMA sizing in 5.11).

 - Remove redundant IRQ flag tracing that was leaving lockdep
   inconsistent with the hardware state.

 - Revert perf events based hard lockup detector that was causing
   smp_processor_id() to be called in preemptible context.

 - Some trivial cleanups - spelling fix, renaming S_FRAME_SIZE to
   PT_REGS_SIZE, function prototypes added.

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: selftests: Fix spelling of 'Mismatch'
  arm64: syscall: include prototype for EL0 SVC functions
  compiler.h: Raise minimum version of GCC to 5.1 for arm64
  arm64: make atomic helpers __always_inline
  arm64: rename S_FRAME_SIZE to PT_REGS_SIZE
  Revert "arm64: Enable perf events based hard lockup detector"
  arm64: entry: remove redundant IRQ flag tracing
  arm64: Remove arm64_dma32_phys_limit and its uses
  • Loading branch information
torvalds committed Jan 15, 2021
2 parents f288c89 + 3a57a64 commit 82821be
Show file tree
Hide file tree
Showing 17 changed files with 56 additions and 107 deletions.
2 changes: 0 additions & 2 deletions arch/arm64/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,6 @@ config ARM64
select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
Expand Down
10 changes: 5 additions & 5 deletions arch/arm64/include/asm/atomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <asm/lse.h>

#define ATOMIC_OP(op) \
static inline void arch_##op(int i, atomic_t *v) \
static __always_inline void arch_##op(int i, atomic_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
Expand All @@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub)
#undef ATOMIC_OP

#define ATOMIC_FETCH_OP(name, op) \
static inline int arch_##op##name(int i, atomic_t *v) \
static __always_inline int arch_##op##name(int i, atomic_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
Expand All @@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return)
#undef ATOMIC_FETCH_OPS

#define ATOMIC64_OP(op) \
static inline void arch_##op(long i, atomic64_t *v) \
static __always_inline void arch_##op(long i, atomic64_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
Expand All @@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub)
#undef ATOMIC64_OP

#define ATOMIC64_FETCH_OP(name, op) \
static inline long arch_##op##name(long i, atomic64_t *v) \
static __always_inline long arch_##op##name(long i, atomic64_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
Expand All @@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return)
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_FETCH_OPS

static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
return __lse_ll_sc_body(atomic64_dec_if_positive, v);
}
Expand Down
3 changes: 1 addition & 2 deletions arch/arm64/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,7 @@
#endif /* CONFIG_ARM64_FORCE_52BIT */

extern phys_addr_t arm64_dma_phys_limit;
extern phys_addr_t arm64_dma32_phys_limit;
#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1)
#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)

struct debug_info {
#ifdef CONFIG_HAVE_HW_BREAKPOINT
Expand Down
2 changes: 1 addition & 1 deletion arch/arm64/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ int main(void)
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
#ifdef CONFIG_COMPAT
DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
Expand Down
12 changes: 6 additions & 6 deletions arch/arm64/kernel/entry-ftrace.S
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
*/
.macro ftrace_regs_entry, allregs=0
/* Make room for pt_regs, plus a callee frame */
sub sp, sp, #(S_FRAME_SIZE + 16)
sub sp, sp, #(PT_REGS_SIZE + 16)

/* Save function arguments (and x9 for simplicity) */
stp x0, x1, [sp, #S_X0]
Expand All @@ -61,15 +61,15 @@
.endif

/* Save the callsite's SP and LR */
add x10, sp, #(S_FRAME_SIZE + 16)
add x10, sp, #(PT_REGS_SIZE + 16)
stp x9, x10, [sp, #S_LR]

/* Save the PC after the ftrace callsite */
str x30, [sp, #S_PC]

/* Create a frame record for the callsite above pt_regs */
stp x29, x9, [sp, #S_FRAME_SIZE]
add x29, sp, #S_FRAME_SIZE
stp x29, x9, [sp, #PT_REGS_SIZE]
add x29, sp, #PT_REGS_SIZE

/* Create our frame record within pt_regs. */
stp x29, x30, [sp, #S_STACKFRAME]
Expand Down Expand Up @@ -120,7 +120,7 @@ ftrace_common_return:
ldr x9, [sp, #S_PC]

/* Restore the callsite's SP */
add sp, sp, #S_FRAME_SIZE + 16
add sp, sp, #PT_REGS_SIZE + 16

ret x9
SYM_CODE_END(ftrace_common)
Expand All @@ -130,7 +130,7 @@ SYM_CODE_START(ftrace_graph_caller)
ldr x0, [sp, #S_PC]
sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
add x1, sp, #S_LR // parent_ip (callsite's LR)
ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP)
bl prepare_ftrace_return
b ftrace_common_return
SYM_CODE_END(ftrace_graph_caller)
Expand Down
14 changes: 7 additions & 7 deletions arch/arm64/kernel/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ alternative_else_nop_endif
.endif
#endif

sub sp, sp, #S_FRAME_SIZE
sub sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
Expand All @@ -96,7 +96,7 @@ alternative_else_nop_endif
* userspace, and can clobber EL0 registers to free up GPRs.
*/

/* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
/* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
msr tpidr_el0, x0

/* Recover the original x0 value and stash it in tpidrro_el0 */
Expand Down Expand Up @@ -253,7 +253,7 @@ alternative_else_nop_endif

scs_load tsk, x20
.else
add x21, sp, #S_FRAME_SIZE
add x21, sp, #PT_REGS_SIZE
get_current_task tsk
.endif /* \el == 0 */
mrs x22, elr_el1
Expand Down Expand Up @@ -377,7 +377,7 @@ alternative_else_nop_endif
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
add sp, sp, #PT_REGS_SIZE // restore sp

.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
Expand Down Expand Up @@ -580,12 +580,12 @@ __bad_stack:

/*
* Store the original GPRs to the new stack. The orginal SP (minus
* S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
* PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry.
*/
sub sp, sp, #S_FRAME_SIZE
sub sp, sp, #PT_REGS_SIZE
kernel_entry 1
mrs x0, tpidr_el0
add x0, x0, #S_FRAME_SIZE
add x0, x0, #PT_REGS_SIZE
str x0, [sp, #S_SP]

/* Stash the regs for handle_bad_stack */
Expand Down
41 changes: 2 additions & 39 deletions arch/arm64/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
#include <linux/platform_device.h>
#include <linux/sched_clock.h>
#include <linux/smp.h>
#include <linux/nmi.h>
#include <linux/cpufreq.h>

/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
Expand Down Expand Up @@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = {

static int __init armv8_pmu_driver_init(void)
{
int ret;

if (acpi_disabled)
ret = platform_driver_register(&armv8_pmu_driver);
return platform_driver_register(&armv8_pmu_driver);
else
ret = arm_pmu_acpi_probe(armv8_pmuv3_init);

/*
* Try to re-initialize lockup detector after PMU init in
* case PMU events are triggered via NMIs.
*/
if (ret == 0 && arm_pmu_irq_is_nmi())
lockup_detector_init();

return ret;
return arm_pmu_acpi_probe(armv8_pmuv3_init);
}
device_initcall(armv8_pmu_driver_init)

Expand Down Expand Up @@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time_zero = 1;
userpg->cap_user_time_short = 1;
}

#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
/*
* Safe maximum CPU frequency in case a particular platform doesn't implement
* cpufreq driver. Although, architecture doesn't put any restrictions on
* maximum frequency but 5 GHz seems to be safe maximum given the available
* Arm CPUs in the market which are clocked much less than 5 GHz. On the other
* hand, we can't make it much higher as it would lead to a large hard-lockup
* detection timeout on parts which are running slower (eg. 1GHz on
* Developerbox) and doesn't possess a cpufreq driver.
*/
#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
unsigned int cpu = smp_processor_id();
unsigned long max_cpu_freq;

max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
if (!max_cpu_freq)
max_cpu_freq = SAFE_MAX_CPU_FREQ;

return (u64)max_cpu_freq * watchdog_thresh;
}
#endif
6 changes: 3 additions & 3 deletions arch/arm64/kernel/probes/kprobes_trampoline.S
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
stp x24, x25, [sp, #S_X24]
stp x26, x27, [sp, #S_X26]
stp x28, x29, [sp, #S_X28]
add x0, sp, #S_FRAME_SIZE
add x0, sp, #PT_REGS_SIZE
stp lr, x0, [sp, #S_LR]
/*
* Construct a useful saved PSTATE
Expand Down Expand Up @@ -62,7 +62,7 @@
.endm

SYM_CODE_START(kretprobe_trampoline)
sub sp, sp, #S_FRAME_SIZE
sub sp, sp, #PT_REGS_SIZE

save_all_base_regs

Expand All @@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline)

restore_all_base_regs

add sp, sp, #S_FRAME_SIZE
add sp, sp, #PT_REGS_SIZE
ret

SYM_CODE_END(kretprobe_trampoline)
7 changes: 0 additions & 7 deletions arch/arm64/kernel/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs)
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned long thread_flags)
{
/*
* The assembly code enters us with IRQs off, but it hasn't
* informed the tracing code of that for efficiency reasons.
* Update the trace code with the current status.
*/
trace_hardirqs_off();

do {
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */
Expand Down
10 changes: 2 additions & 8 deletions arch/arm64/kernel/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exception.h>
#include <asm/fpsimd.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
Expand Down Expand Up @@ -165,15 +166,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
local_daif_mask();
flags = current_thread_info()->flags;
if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) {
/*
* We're off to userspace, where interrupts are
* always enabled after we restore the flags from
* the SPSR.
*/
trace_hardirqs_on();
if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
return;
}
local_daif_restore(DAIF_PROCCTX);
}

Expand Down
33 changes: 18 additions & 15 deletions arch/arm64/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1;
EXPORT_SYMBOL(memstart_addr);

/*
* We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
* memory as some devices, namely the Raspberry Pi 4, have peripherals with
* this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
* bit addressable memory area.
* If the corresponding config options are enabled, we create both ZONE_DMA
* and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory
* unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
* In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
* otherwise it is empty.
*/
phys_addr_t arm64_dma_phys_limit __ro_after_init;
phys_addr_t arm64_dma32_phys_limit __ro_after_init;

#ifdef CONFIG_KEXEC_CORE
/*
Expand All @@ -84,7 +84,7 @@ static void __init reserve_crashkernel(void)

if (crash_base == 0) {
/* Current arm64 boot protocol requires 2MB alignment */
crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
crash_size, SZ_2M);
if (crash_base == 0) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
Expand Down Expand Up @@ -196,6 +196,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
unsigned int __maybe_unused acpi_zone_dma_bits;
unsigned int __maybe_unused dt_zone_dma_bits;
phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32);

#ifdef CONFIG_ZONE_DMA
acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
Expand All @@ -205,8 +206,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
#endif
#ifdef CONFIG_ZONE_DMA32
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = dma32_phys_limit;
#endif
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = PHYS_MASK + 1;
max_zone_pfns[ZONE_NORMAL] = max;

free_area_init(max_zone_pfns);
Expand Down Expand Up @@ -394,16 +399,9 @@ void __init arm64_memblock_init(void)

early_init_fdt_scan_reserved_mem();

if (IS_ENABLED(CONFIG_ZONE_DMA32))
arm64_dma32_phys_limit = max_zone_phys(32);
else
arm64_dma32_phys_limit = PHYS_MASK + 1;

reserve_elfcorehdr();

high_memory = __va(memblock_end_of_DRAM() - 1) + 1;

dma_contiguous_reserve(arm64_dma32_phys_limit);
}

void __init bootmem_init(void)
Expand Down Expand Up @@ -438,6 +436,11 @@ void __init bootmem_init(void)
sparse_init();
zone_sizes_init(min, max);

/*
* Reserve the CMA area after arm64_dma_phys_limit was initialised.
*/
dma_contiguous_reserve(arm64_dma_phys_limit);

/*
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
Expand All @@ -455,7 +458,7 @@ void __init bootmem_init(void)
void __init mem_init(void)
{
if (swiotlb_force == SWIOTLB_FORCE ||
max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
max_pfn > PFN_DOWN(arm64_dma_phys_limit))
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;
Expand Down
5 changes: 0 additions & 5 deletions drivers/perf/arm_pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -726,11 +726,6 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
return per_cpu(hw_events->irq, cpu);
}

bool arm_pmu_irq_is_nmi(void)
{
return has_nmi;
}

/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are
Expand Down
Loading

0 comments on commit 82821be

Please sign in to comment.