Skip to content

Commit

Permalink
MIPS: Outline udelay and fix a few issues.
Browse files Browse the repository at this point in the history
Outlining fixes the issue were on certain CPUs such as the R10000 family
the delay loop would need an extra cycle if it overlaps a cacheline
boundary.

The rewrite also fixes build errors with GCC 4.4 which was changed in
way incompatible with the kernel's inline assembly.

Relying on pure C for computation of the delay value removes the need for
explicit.  The price we pay is a slight slowdown of the computation - to
be fixed on another day.

Signed-off-by: Ralf Baechle <[email protected]>
  • Loading branch information
ralfbaechle committed Jun 8, 2009
1 parent 3a55314 commit 5636919
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 92 deletions.
4 changes: 2 additions & 2 deletions arch/mips/include/asm/cpu-info.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ struct cache_desc {
#define MIPS_CACHE_PINDEX 0x00000020 /* Physically indexed cache */

struct cpuinfo_mips {
unsigned long udelay_val;
unsigned long asid_cache;
unsigned int udelay_val;
unsigned int asid_cache;

/*
* Capability and feature descriptor structure for MIPS CPU
Expand Down
92 changes: 5 additions & 87 deletions arch/mips/include/asm/delay.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,94 +11,12 @@
#ifndef _ASM_DELAY_H
#define _ASM_DELAY_H

#include <linux/param.h>
#include <linux/smp.h>
extern void __delay(unsigned int loops);
extern void __ndelay(unsigned int ns);
extern void __udelay(unsigned int us);

#include <asm/compiler.h>
#include <asm/war.h>

static inline void __delay(unsigned long loops)
{
if (sizeof(long) == 4)
__asm__ __volatile__ (
" .set noreorder \n"
" .align 3 \n"
"1: bnez %0, 1b \n"
" subu %0, 1 \n"
" .set reorder \n"
: "=r" (loops)
: "0" (loops));
else if (sizeof(long) == 8 && !DADDI_WAR)
__asm__ __volatile__ (
" .set noreorder \n"
" .align 3 \n"
"1: bnez %0, 1b \n"
" dsubu %0, 1 \n"
" .set reorder \n"
: "=r" (loops)
: "0" (loops));
else if (sizeof(long) == 8 && DADDI_WAR)
__asm__ __volatile__ (
" .set noreorder \n"
" .align 3 \n"
"1: bnez %0, 1b \n"
" dsubu %0, %2 \n"
" .set reorder \n"
: "=r" (loops)
: "0" (loops), "r" (1));
}


/*
* Division by multiplication: you don't have to worry about
* loss of precision.
*
* Use only for very small delays ( < 1 msec). Should probably use a
* lookup table, really, as the multiplications take much too long with
* short delays. This is a "reasonable" implementation, though (and the
* first constant multiplications gets optimized away if the delay is
* a constant)
*/

static inline void __udelay(unsigned long usecs, unsigned long lpj)
{
unsigned long hi, lo;

/*
* The rates of 128 is rounded wrongly by the catchall case
* for 64-bit. Excessive precission? Probably ...
*/
#if defined(CONFIG_64BIT) && (HZ == 128)
usecs *= 0x0008637bd05af6c7UL; /* 2**64 / (1000000 / HZ) */
#elif defined(CONFIG_64BIT)
usecs *= (0x8000000000000000UL / (500000 / HZ));
#else /* 32-bit junk follows here */
usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) +
0x80000000ULL) >> 32);
#endif

if (sizeof(long) == 4)
__asm__("multu\t%2, %3"
: "=h" (usecs), "=l" (lo)
: "r" (usecs), "r" (lpj)
: GCC_REG_ACCUM);
else if (sizeof(long) == 8 && !R4000_WAR)
__asm__("dmultu\t%2, %3"
: "=h" (usecs), "=l" (lo)
: "r" (usecs), "r" (lpj)
: GCC_REG_ACCUM);
else if (sizeof(long) == 8 && R4000_WAR)
__asm__("dmultu\t%3, %4\n\tmfhi\t%0"
: "=r" (usecs), "=h" (hi), "=l" (lo)
: "r" (usecs), "r" (lpj)
: GCC_REG_ACCUM);

__delay(usecs);
}

#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val

#define udelay(usecs) __udelay((usecs), __udelay_val)
#define ndelay(ns) __udelay(ns)
#define udelay(us) __udelay(us)

/* make sure "usecs *= ..." in udelay do not overflow. */
#if HZ >= 1000
Expand Down
2 changes: 1 addition & 1 deletion arch/mips/kernel/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, fmt, __cpu_name[n],
(version >> 4) & 0x0f, version & 0x0f,
(fp_vers >> 4) & 0x0f, fp_vers & 0x0f);
seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n",
seq_printf(m, "BogoMIPS\t\t: %u.%02u\n",
cpu_data[n].udelay_val / (500000/HZ),
(cpu_data[n].udelay_val / (5000/HZ)) % 100);
seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no");
Expand Down
4 changes: 2 additions & 2 deletions arch/mips/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# Makefile for MIPS-specific library files..
#

lib-y += csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \
strncpy_user.o strnlen_user.o uncached.o
lib-y += csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \
strlen_user.o strncpy_user.o strnlen_user.o uncached.o

obj-y += iomap.o
obj-$(CONFIG_PCI) += iomap-pci.o
Expand Down
56 changes: 56 additions & 0 deletions arch/mips/lib/delay.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1994 by Waldorf Electronics
* Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2007 Maciej W. Rozycki
*/
#include <linux/module.h>
#include <linux/param.h>
#include <linux/smp.h>

#include <asm/compiler.h>
#include <asm/war.h>

inline void __delay(unsigned int loops)
{
__asm__ __volatile__ (
" .set noreorder \n"
" .align 3 \n"
"1: bnez %0, 1b \n"
" subu %0, 1 \n"
" .set reorder \n"
: "=r" (loops)
: "0" (loops));
}
EXPORT_SYMBOL(__delay);

/*
* Division by multiplication: you don't have to worry about
* loss of precision.
*
* Use only for very small delays ( < 1 msec). Should probably use a
* lookup table, really, as the multiplications take much too long with
* short delays. This is a "reasonable" implementation, though (and the
* first constant multiplications gets optimized away if the delay is
* a constant)
*/

void __udelay(unsigned long us)
{
unsigned int lpj = current_cpu_data.udelay_val;

__delay((us * 0x000010c7 * HZ * lpj) >> 32);
}
EXPORT_SYMBOL(__udelay);

void __ndelay(unsigned long ns)
{
unsigned int lpj = current_cpu_data.udelay_val;

__delay((us * 0x00000005 * HZ * lpj) >> 32);
}
EXPORT_SYMBOL(__ndelay);

0 comments on commit 5636919

Please sign in to comment.