Skip to content

Commit

Permalink
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull x86/asm changes from Ingo Molnar

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86: Include probe_roms.h in probe_roms.c
  x86/32: Print control and debug registers for kerenel context
  x86: Tighten dependencies of CPU_SUP_*_32
  x86/numa: Improve internode cache alignment
  x86: Fix the NMI nesting comments
  x86-64: Improve insn scheduling in SAVE_ARGS_IRQ
  x86-64: Fix CFI annotations for NMI nesting code
  bitops: Add missing parentheses to new get_order macro
  bitops: Optimise get_order()
  bitops: Adjust the comment on get_order() to describe the size==0 case
  x86/spinlocks: Eliminate TICKET_MASK
  x86-64: Handle byte-wise tail copying in memcpy() without a loop
  x86-64: Fix memcpy() to support sizes of 4Gb and above
  x86-64: Fix memset() to support sizes of 4Gb and above
  x86-64: Slightly shorten copy_page()
  • Loading branch information
torvalds committed Mar 22, 2012
2 parents 9521127 + a240ada commit e17fdf5
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 98 deletions.
5 changes: 2 additions & 3 deletions arch/x86/Kconfig.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,6 @@ config X86_GENERIC
config X86_INTERNODE_CACHE_SHIFT
int
default "12" if X86_VSMP
default "7" if NUMA
default X86_L1_CACHE_SHIFT

config X86_CMPXCHG
Expand Down Expand Up @@ -441,7 +440,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
depends on !64BIT
depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors

Expand Down Expand Up @@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
depends on !64BIT
depends on M386 || M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors

Expand Down
4 changes: 2 additions & 2 deletions arch/x86/include/asm/spinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);

return !!(tmp.tail ^ tmp.head);
return tmp.tail != tmp.head;
}

static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);

return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
return (__ticket_t)(tmp.tail - tmp.head) > 1;
}

#ifndef CONFIG_PARAVIRT_SPINLOCKS
Expand Down
1 change: 0 additions & 1 deletion arch/x86/include/asm/spinlock_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
#endif

#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))

typedef struct arch_spinlock {
union {
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/dumpstack_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
int i;

print_modules();
__show_regs(regs, 0);
__show_regs(regs, !user_mode_vm(regs));

printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
TASK_COMM_LEN, current->comm, task_pid_nr(current),
Expand Down
71 changes: 38 additions & 33 deletions arch/x86/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
movq %rsp, %rsi

leaq -RBP(%rsp),%rdi /* arg1 for handler */
testl $3, CS(%rdi)
testl $3, CS-RBP(%rsi)
je 1f
SWAPGS
/*
Expand All @@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
* moving irq_enter into assembly, which would be too much work)
*/
1: incl PER_CPU_VAR(irq_count)
jne 2f
mov PER_CPU_VAR(irq_stack_ptr),%rsp
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi

2: /* Store previous stack value */
/* Store previous stack value */
pushq %rsi
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
0x77 /* DW_OP_breg7 */, 0, \
Expand Down Expand Up @@ -1530,6 +1529,7 @@ ENTRY(nmi)

/* Use %rdx as out temp variable throughout */
pushq_cfi %rdx
CFI_REL_OFFSET rdx, 0

/*
* If %cs was not the kernel segment, then the NMI triggered in user
Expand All @@ -1554,6 +1554,7 @@ ENTRY(nmi)
*/
lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
CFI_REMEMBER_STATE

nested_nmi:
/*
Expand Down Expand Up @@ -1585,10 +1586,12 @@ nested_nmi:

nested_nmi_out:
popq_cfi %rdx
CFI_RESTORE rdx

/* No need to check faults here */
INTERRUPT_RETURN

CFI_RESTORE_STATE
first_nmi:
/*
* Because nested NMIs will use the pushed location that we
Expand Down Expand Up @@ -1620,33 +1623,55 @@ first_nmi:
* | pt_regs |
* +-------------------------+
*
* The saved RIP is used to fix up the copied RIP that a nested
* NMI may zero out. The original stack frame and the temp storage
* The saved stack frame is used to fix up the copied stack frame
* that a nested NMI may change to make the interrupted NMI iret jump
* to the repeat_nmi. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit.
*/
/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
movq (%rsp), %rdx
CFI_RESTORE rdx

/* Set the NMI executing variable on the stack. */
pushq_cfi $1

/* Copy the stack frame to the Saved frame */
.rept 5
pushq_cfi 6*8(%rsp)
.endr
CFI_DEF_CFA_OFFSET SS+8-RIP

/* Everything up to here is safe from nested NMIs */

/*
* If there was a nested NMI, the first NMI's iret will return
* here. But NMIs are still enabled and we can take another
* nested NMI. The nested NMI checks the interrupted RIP to see
* if it is between repeat_nmi and end_repeat_nmi, and if so
* it will just return, as we are about to repeat an NMI anyway.
* This makes it safe to copy to the stack frame that a nested
* NMI will update.
*/
repeat_nmi:
/*
* Update the stack variable to say we are still in NMI (the update
* is benign for the non-repeat case, where 1 was pushed just above
* to this very stack slot).
*/
movq $1, 5*8(%rsp)

/* Make another copy, this one may be modified by nested NMIs */
.rept 5
pushq_cfi 4*8(%rsp)
.endr

/* Do not pop rdx, nested NMIs will corrupt it */
movq 11*8(%rsp), %rdx
CFI_DEF_CFA_OFFSET SS+8-RIP
end_repeat_nmi:

/*
* Everything below this point can be preempted by a nested
* NMI if the first NMI took an exception. Repeated NMIs
* caused by an exception and nested NMI will start here, and
* can still be preempted by another NMI.
* NMI if the first NMI took an exception and reset our iret stack
* so that we repeat another NMI.
*/
restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
Expand Down Expand Up @@ -1675,26 +1700,6 @@ nmi_restore:
CFI_ENDPROC
END(nmi)

/*
* If an NMI hit an iret because of an exception or breakpoint,
* it can lose its NMI context, and a nested NMI may come in.
* In that case, the nested NMI will change the preempted NMI's
* stack to jump to here when it does the final iret.
*/
repeat_nmi:
INTR_FRAME
/* Update the stack variable to say we are still in NMI */
movq $1, 5*8(%rsp)

/* copy the saved stack back to copy stack */
.rept 5
pushq_cfi 4*8(%rsp)
.endr

jmp restart_nmi
CFI_ENDPROC
end_repeat_nmi:

ENTRY(ignore_sysret)
CFI_STARTPROC
mov $-ENOSYS,%eax
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/probe_roms.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/export.h>

#include <asm/probe_roms.h>
#include <asm/pci-direct.h>
#include <asm/e820.h>
#include <asm/mmzone.h>
Expand Down
12 changes: 4 additions & 8 deletions arch/x86/lib/copy_page_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,12 @@ ENDPROC(copy_page_c)

ENTRY(copy_page)
CFI_STARTPROC
subq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET 3*8
subq $2*8,%rsp
CFI_ADJUST_CFA_OFFSET 2*8
movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13, 2*8

movl $(4096/64)-5,%ecx
.p2align 4
Expand Down Expand Up @@ -91,10 +89,8 @@ ENTRY(copy_page)
CFI_RESTORE rbx
movq 1*8(%rsp),%r12
CFI_RESTORE r12
movq 2*8(%rsp),%r13
CFI_RESTORE r13
addq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET -3*8
addq $2*8,%rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
CFI_ENDPROC
Expand Down
44 changes: 20 additions & 24 deletions arch/x86/lib/memcpy_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c:
movq %rdi, %rax

movl %edx, %ecx
shrl $3, %ecx
movq %rdx, %rcx
shrq $3, %rcx
andl $7, %edx
rep movsq
movl %edx, %ecx
Expand All @@ -48,8 +47,7 @@
.section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e:
movq %rdi, %rax

movl %edx, %ecx
movq %rdx, %rcx
rep movsb
ret
.Lmemcpy_e_e:
Expand All @@ -60,10 +58,7 @@ ENTRY(memcpy)
CFI_STARTPROC
movq %rdi, %rax

/*
* Use 32bit CMP here to avoid long NOP padding.
*/
cmp $0x20, %edx
cmpq $0x20, %rdx
jb .Lhandle_tail

/*
Expand All @@ -72,7 +67,7 @@ ENTRY(memcpy)
*/
cmp %dil, %sil
jl .Lcopy_backward
subl $0x20, %edx
subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx

Expand All @@ -91,7 +86,7 @@ ENTRY(memcpy)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
addq $0x20, %rdx
addl $0x20, %edx
jmp .Lhandle_tail

.Lcopy_backward:
Expand Down Expand Up @@ -123,11 +118,11 @@ ENTRY(memcpy)
/*
* Calculate copy position to head.
*/
addq $0x20, %rdx
addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
cmpq $16, %rdx
cmpl $16, %edx
jb .Lless_16bytes

/*
Expand All @@ -144,7 +139,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_16bytes:
cmpq $8, %rdx
cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
Expand All @@ -156,7 +151,7 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_8bytes:
cmpq $4, %rdx
cmpl $4, %edx
jb .Lless_3bytes

/*
Expand All @@ -169,18 +164,19 @@ ENTRY(memcpy)
retq
.p2align 4
.Lless_3bytes:
cmpl $0, %edx
je .Lend
subl $1, %edx
jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
.Lloop_1:
movb (%rsi), %r8b
movb %r8b, (%rdi)
incq %rdi
incq %rsi
decl %edx
jnz .Lloop_1
movzbl (%rsi), %ecx
jz .Lstore_1byte
movzbq 1(%rsi), %r8
movzbq (%rsi, %rdx), %r9
movb %r8b, 1(%rdi)
movb %r9b, (%rdi, %rdx)
.Lstore_1byte:
movb %cl, (%rdi)

.Lend:
retq
Expand Down
Loading

0 comments on commit e17fdf5

Please sign in to comment.