Skip to content

Commit

Permalink
Disable SSE2 optimization with --disable-sse2 configure option.
Browse files Browse the repository at this point in the history
Now SSE2 optimization for gcc is enabled by default.
  • Loading branch information
Naoaki Okazaki committed Aug 9, 2011
1 parent 62e201c commit bbd46a6
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 13 deletions.
17 changes: 8 additions & 9 deletions configure.in
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ dnl Checks for profiling mode
dnl ------------------------------------------------------------------
AC_ARG_ENABLE(
profile,
[AS_HELP_STRING([--enable-profile],[Turn on profiling])]
[AS_HELP_STRING([--enable-profile],[turn on profiling])]
)

if test "x$enable_profile" = "xyes"; then
Expand All @@ -103,14 +103,13 @@ fi
dnl ------------------------------------------------------------------
dnl Checks for SSE2 build
dnl ------------------------------------------------------------------
AC_ARG_ENABLE(
sse2,
[AS_HELP_STRING(
[--enable-sse2],
[enable SSE2 optimization routines]
)],
[CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"]
)
AC_ARG_ENABLE([sse2],
AS_HELP_STRING([--disable-sse2], [disable SSE2 optimization routines])
)

AS_IF([test "x$enable_sse2" != "xno"], [
CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"
])


dnl ------------------------------------------------------------------
Expand Down
23 changes: 19 additions & 4 deletions lib/crf/src/vecmath.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@
#include <math.h>
#include <memory.h>

#ifdef USE_SSE
#include <emmintrin.h>
#endif/*USE_SSE*/

#ifdef _MSC_VER
#include <malloc.h>
Expand Down Expand Up @@ -169,6 +171,8 @@ inline static floatval_t vecsumlog(floatval_t* x, const int n)
return s;
}

#ifdef USE_SSE

inline static void vecexp(double *values, const int n)
{
int i;
Expand Down Expand Up @@ -202,8 +206,8 @@ inline static void vecexp(double *values, const int n)
/* Load four double values. */
xmm0 = _mm_load_pd(maxlog);
xmm1 = _mm_load_pd(minlog);
x1 = _mm_load_pd(values+i);
x2 = _mm_load_pd(values+i+2);
x1 = _mm_load_pd(values+i);
x2 = _mm_load_pd(values+i+2);
x1 = _mm_min_pd(x1, xmm0);
x2 = _mm_min_pd(x2, xmm0);
x1 = _mm_max_pd(x1, xmm1);
Expand Down Expand Up @@ -317,10 +321,21 @@ inline static void vecexp(double *values, const int n)
a2 = _mm_mul_pd(a2, p2);

/* Store the results. */
_mm_store_pd(values+i, a1);
_mm_store_pd(values+i+2, a2);
_mm_store_pd(values+i, a1);
_mm_store_pd(values+i+2, a2);
}
}

#else

inline static void vecexp(double *values, const int n)
{
int i;
for (i = 0;i < n;++i) {
values[i] = exp(values[i]);
}
}

#endif /*USE_SSE*/

#endif/*__VECMATH_H__*/

0 comments on commit bbd46a6

Please sign in to comment.