From bbd46a6690a516ff545f06429615cf9e15d7097b Mon Sep 17 00:00:00 2001 From: Naoaki Okazaki Date: Tue, 9 Aug 2011 23:01:33 +0900 Subject: [PATCH] Disable SSE2 optimization with --disable-sse2 configure option. Now SSE2 optimization for gcc is enabled by default. --- configure.in | 17 ++++++++--------- lib/crf/src/vecmath.h | 23 +++++++++++++++++++---- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/configure.in b/configure.in index bb18fd28..c446fccb 100644 --- a/configure.in +++ b/configure.in @@ -92,7 +92,7 @@ dnl Checks for profiling mode dnl ------------------------------------------------------------------ AC_ARG_ENABLE( profile, - [AS_HELP_STRING([--enable-profile],[Turn on profiling])] + [AS_HELP_STRING([--enable-profile],[turn on profiling])] ) if test "x$enable_profile" = "xyes"; then @@ -103,14 +103,13 @@ fi dnl ------------------------------------------------------------------ dnl Checks for SSE2 build dnl ------------------------------------------------------------------ -AC_ARG_ENABLE( - sse2, - [AS_HELP_STRING( - [--enable-sse2], - [enable SSE2 optimization routines] - )], - [CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}"] -) +AC_ARG_ENABLE([sse2], + AS_HELP_STRING([--disable-sse2], [disable SSE2 optimization routines]) + ) + +AS_IF([test "x$enable_sse2" != "xno"], [ + CFLAGS="-mfpmath=sse -msse2 -DUSE_SSE ${CFLAGS}" +]) dnl ------------------------------------------------------------------ diff --git a/lib/crf/src/vecmath.h b/lib/crf/src/vecmath.h index 2078e265..2a77b386 100644 --- a/lib/crf/src/vecmath.h +++ b/lib/crf/src/vecmath.h @@ -36,7 +36,9 @@ #include #include +#ifdef USE_SSE #include +#endif/*USE_SSE*/ #ifdef _MSC_VER #include @@ -169,6 +171,8 @@ inline static floatval_t vecsumlog(floatval_t* x, const int n) return s; } +#ifdef USE_SSE + inline static void vecexp(double *values, const int n) { int i; @@ -202,8 +206,8 @@ inline static void vecexp(double *values, const int n) /* Load four double values. */ xmm0 = _mm_load_pd(maxlog); xmm1 = _mm_load_pd(minlog); - x1 = _mm_load_pd(values+i); - x2 = _mm_load_pd(values+i+2); + x1 = _mm_load_pd(values+i); + x2 = _mm_load_pd(values+i+2); x1 = _mm_min_pd(x1, xmm0); x2 = _mm_min_pd(x2, xmm0); x1 = _mm_max_pd(x1, xmm1); @@ -317,10 +321,21 @@ inline static void vecexp(double *values, const int n) a2 = _mm_mul_pd(a2, p2); /* Store the results. */ - _mm_store_pd(values+i, a1); - _mm_store_pd(values+i+2, a2); + _mm_store_pd(values+i, a1); + _mm_store_pd(values+i+2, a2); + } +} + +#else + +inline static void vecexp(double *values, const int n) +{ + int i; + for (i = 0;i < n;++i) { + values[i] = exp(values[i]); } } +#endif /*USE_SSE*/ #endif/*__VECMATH_H__*/