Skip to content

Commit

Permalink
x86, spinlock: Replace pv spinlocks with pv ticketlocks
Browse files Browse the repository at this point in the history
Rather than outright replacing the entire spinlock implementation in
order to paravirtualize it, keep the ticket lock implementation but add
a couple of pvops hooks on the slow patch (long spin on lock, unlocking
a contended lock).

Ticket locks have a number of nice properties, but they also have some
surprising behaviours in virtual environments.  They enforce a strict
FIFO ordering on cpus trying to take a lock; however, if the hypervisor
scheduler does not schedule the cpus in the correct order, the system can
waste a huge amount of time spinning until the next cpu can take the lock.

(See Thomas Friebel's talk "Prevent Guests from Spinning Around"
http://www.xen.org/files/xensummitboston08/LHP.pdf for more details.)

To address this, we add two hooks:
 - __ticket_spin_lock which is called after the cpu has been
   spinning on the lock for a significant number of iterations but has
   failed to take the lock (presumably because the cpu holding the lock
   has been descheduled).  The lock_spinning pvop is expected to block
   the cpu until it has been kicked by the current lock holder.
 - __ticket_spin_unlock, which on releasing a contended lock
   (there are more cpus with tail tickets), it looks to see if the next
   cpu is blocked and wakes it if so.

When compiled with CONFIG_PARAVIRT_SPINLOCKS disabled, a set of stub
functions causes all the extra code to go away.

Results:
=======
setup: 32 core machine with 32 vcpu KVM guest (HT off)  with 8GB RAM
base = 3.11-rc
patched = base + pvspinlock V12

+-----------------+----------------+--------+
 dbench (Throughput in MB/sec. Higher is better)
+-----------------+----------------+--------+
|   base (stdev %)|patched(stdev%) | %gain  |
+-----------------+----------------+--------+
| 15035.3   (0.3) |15150.0   (0.6) |   0.8  |
|  1470.0   (2.2) | 1713.7   (1.9) |  16.6  |
|   848.6   (4.3) |  967.8   (4.3) |  14.0  |
|   652.9   (3.5) |  685.3   (3.7) |   5.0  |
+-----------------+----------------+--------+

pvspinlock shows benefits for overcommit ratio > 1 for PLE enabled cases,
and undercommits results are flat

Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Link: http://lkml.kernel.org/r/1376058122-8248-2-git-send-email-raghavendra.kt@linux.vnet.ibm.com
Reviewed-by: Konrad Rzeszutek Wilk <[email protected]>
Tested-by: Attilio Rao <[email protected]>
[ Raghavendra: Changed SPIN_THRESHOLD, fixed redefinition of arch_spinlock_t]
Signed-off-by: Raghavendra K T <[email protected]>
Acked-by: Ingo Molnar <[email protected]>
Signed-off-by: H. Peter Anvin <[email protected]>
  • Loading branch information
jsgf authored and H. Peter Anvin committed Aug 9, 2013
1 parent c095ba7 commit 545ac13
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 61 deletions.
32 changes: 6 additions & 26 deletions arch/x86/include/asm/paravirt.h
Original file line number Diff line number Diff line change
Expand Up @@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,

#if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)

static inline int arch_spin_is_locked(struct arch_spinlock *lock)
static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
__ticket_t ticket)
{
return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
PVOP_VCALL2(pv_lock_ops.lock_spinning, lock, ticket);
}

static inline int arch_spin_is_contended(struct arch_spinlock *lock)
static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
__ticket_t ticket)
{
return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
}
#define arch_spin_is_contended arch_spin_is_contended

static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
{
PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
}

static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
unsigned long flags)
{
PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}

static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
{
return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
}

static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
{
PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
}

#endif
Expand Down
14 changes: 8 additions & 6 deletions arch/x86/include/asm/paravirt_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,15 @@ struct pv_mmu_ops {
};

struct arch_spinlock;
#ifdef CONFIG_SMP
#include <asm/spinlock_types.h>
#else
typedef u16 __ticket_t;
#endif

struct pv_lock_ops {
int (*spin_is_locked)(struct arch_spinlock *lock);
int (*spin_is_contended)(struct arch_spinlock *lock);
void (*spin_lock)(struct arch_spinlock *lock);
void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
int (*spin_trylock)(struct arch_spinlock *lock);
void (*spin_unlock)(struct arch_spinlock *lock);
void (*lock_spinning)(struct arch_spinlock *lock, __ticket_t ticket);
void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
};

/* This contains all the paravirt structures: we get a convenient
Expand Down
53 changes: 43 additions & 10 deletions arch/x86/include/asm/spinlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,35 @@
# define UNLOCK_LOCK_PREFIX
#endif

/* How long a lock should spin before we consider blocking */
#define SPIN_THRESHOLD (1 << 15)

#ifndef CONFIG_PARAVIRT_SPINLOCKS

static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
__ticket_t ticket)
{
}

static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
__ticket_t ticket)
{
}

#endif /* CONFIG_PARAVIRT_SPINLOCKS */


/*
* If a spinlock has someone waiting on it, then kick the appropriate
* waiting cpu.
*/
static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
__ticket_t next)
{
if (unlikely(lock->tickets.tail != next))
____ticket_unlock_kick(lock, next);
}

/*
* Ticket locks are conceptually two parts, one indicating the current head of
* the queue, and the other indicating the current tail. The lock is acquired
Expand All @@ -47,19 +76,24 @@
* in the high part, because a wide xadd increment of the low part would carry
* up and contaminate the high part.
*/
static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock)
{
register struct __raw_tickets inc = { .tail = 1 };

inc = xadd(&lock->tickets, inc);

for (;;) {
if (inc.head == inc.tail)
break;
cpu_relax();
inc.head = ACCESS_ONCE(lock->tickets.head);
unsigned count = SPIN_THRESHOLD;

do {
if (inc.head == inc.tail)
goto out;
cpu_relax();
inc.head = ACCESS_ONCE(lock->tickets.head);
} while (--count);
__ticket_lock_spinning(lock, inc.tail);
}
barrier(); /* make sure nothing creeps before the lock is taken */
out: barrier(); /* make sure nothing creeps before the lock is taken */
}

static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
Expand All @@ -78,7 +112,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)

static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
__ticket_t next = lock->tickets.head + 1;

__add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
__ticket_unlock_kick(lock, next);
}

static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
Expand All @@ -95,8 +132,6 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
return (__ticket_t)(tmp.tail - tmp.head) > 1;
}

#ifndef CONFIG_PARAVIRT_SPINLOCKS

static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
return __ticket_spin_is_locked(lock);
Expand Down Expand Up @@ -129,8 +164,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
arch_spin_lock(lock);
}

#endif /* CONFIG_PARAVIRT_SPINLOCKS */

static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
while (arch_spin_is_locked(lock))
Expand Down
4 changes: 0 additions & 4 deletions arch/x86/include/asm/spinlock_types.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#ifndef _ASM_X86_SPINLOCK_TYPES_H
#define _ASM_X86_SPINLOCK_TYPES_H

#ifndef __LINUX_SPINLOCK_TYPES_H
# error "please don't include this file directly"
#endif

#include <linux/types.h>

#if (CONFIG_NR_CPUS < 256)
Expand Down
15 changes: 2 additions & 13 deletions arch/x86/kernel/paravirt-spinlocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,10 @@

#include <asm/paravirt.h>

static inline void
default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
{
arch_spin_lock(lock);
}

struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
.spin_is_locked = __ticket_spin_is_locked,
.spin_is_contended = __ticket_spin_is_contended,

.spin_lock = __ticket_spin_lock,
.spin_lock_flags = default_spin_lock_flags,
.spin_trylock = __ticket_spin_trylock,
.spin_unlock = __ticket_spin_unlock,
.lock_spinning = paravirt_nop,
.unlock_kick = paravirt_nop,
#endif
};
EXPORT_SYMBOL(pv_lock_ops);
Expand Down
8 changes: 6 additions & 2 deletions arch/x86/xen/spinlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ struct xen_spinlock {
xen_spinners_t spinners; /* count of waiting cpus */
};

static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;

#if 0
static int xen_spin_is_locked(struct arch_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
Expand Down Expand Up @@ -167,7 +170,6 @@ static int xen_spin_trylock(struct arch_spinlock *lock)
}

static DEFINE_PER_CPU(char *, irq_name);
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);

/*
Expand Down Expand Up @@ -354,6 +356,7 @@ static void xen_spin_unlock(struct arch_spinlock *lock)
if (unlikely(xl->spinners))
xen_spin_unlock_slow(xl);
}
#endif

static irqreturn_t dummy_handler(int irq, void *dev_id)
{
Expand Down Expand Up @@ -418,13 +421,14 @@ void __init xen_init_spinlocks(void)
return;

BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));

#if 0
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
pv_lock_ops.spin_is_contended = xen_spin_is_contended;
pv_lock_ops.spin_lock = xen_spin_lock;
pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
pv_lock_ops.spin_trylock = xen_spin_trylock;
pv_lock_ops.spin_unlock = xen_spin_unlock;
#endif
}

#ifdef CONFIG_XEN_DEBUG_FS
Expand Down

0 comments on commit 545ac13

Please sign in to comment.