Skip to content

Commit

Permalink
Enable SSE code for LBFGS (openmm#4327)
Browse files Browse the repository at this point in the history
* Updated to new version of libLBFGS

* Enable SSE code for LBFGS
  • Loading branch information
peastman authored Nov 30, 2023
1 parent f6e6b6e commit ff3432f
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 47 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
ENDFOREACH(subdir)
IF(X86)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-DHAVE_SSE2=1")
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/lbfgs/src/lbfgs.cpp PROPERTIES COMPILE_FLAGS "-DUSE_SSE=1 -DHAVE_EMMINTRIN_H=1")
ELSE()
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS "-UHAVE_SSE2")
ENDIF()
Expand Down
35 changes: 27 additions & 8 deletions libraries/lbfgs/include/lbfgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
* THE SOFTWARE.
*/

/* $Id: lbfgs.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */

#ifndef __LBFGS_H__
#define __LBFGS_H__
Expand Down Expand Up @@ -243,7 +243,7 @@ typedef struct {
* (f' - f) / f < \ref delta,
* where f' is the objective value of \ref past iterations ago, and f is
* the objective value of the current iteration.
* The default value is \c 0.
* The default value is \c 1e-5.
*/
lbfgsfloatval_t delta;

Expand All @@ -267,7 +267,7 @@ typedef struct {
/**
* The maximum number of trials for the line search.
* This parameter controls the number of function and gradients evaluations
* per iteration for the line search routine. The default value is \c 20.
* per iteration for the line search routine. The default value is \c 40.
*/
int max_linesearch;

Expand Down Expand Up @@ -313,7 +313,7 @@ typedef struct {
* evaluations are inexpensive with respect to the cost of the
* iteration (which is sometimes the case when solving very large
* problems) it may be advantageous to set this parameter to a small
* value. A typical small value is \c 0.1. This parameter shuold be
* value. A typical small value is \c 0.1. This parameter should be
* greater than the \ref ftol parameter (\c 1e-4) and smaller than
* \c 1.0.
*/
Expand Down Expand Up @@ -525,6 +525,13 @@ lbfgsfloatval_t WINDOWS_EXPORT * lbfgs_malloc(int n);
*/
void WINDOWS_EXPORT lbfgs_free(lbfgsfloatval_t *x);

/**
* Get string description of an lbfgs() return code.
*
* @param err A value returned by lbfgs().
*/
const char* lbfgs_strerror(int err);

/** @} */

#ifdef __cplusplus
Expand Down Expand Up @@ -583,7 +590,7 @@ Among the various ports of L-BFGS, this library provides several features:
The library is thread-safe, which is the secondary gain from the callback
interface.
- <b>Cross platform.</b> The source code can be compiled on Microsoft Visual
Studio 2005, GNU C Compiler (gcc), etc.
Studio 2010, GNU C Compiler (gcc), etc.
- <b>Configurable precision</b>: A user can choose single-precision (float)
or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro.
- <b>SSE/SSE2 optimization</b>:
Expand All @@ -597,17 +604,28 @@ This library is used by:
- <a href="http://www.chokkan.org/software/classias/">Classias: A collection of machine-learning algorithms for classification</a>
- <a href="http://www.public.iastate.edu/~gdancik/mlegp/">mlegp: an R package for maximum likelihood estimates for Gaussian processes</a>
- <a href="http://infmath.uibk.ac.at/~matthiasf/imaging2/">imaging2: the imaging2 class library</a>
- <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a>
- <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a>
@section download Download
- <a href="http://www.chokkan.org/software/dist/liblbfgs-1.9.tar.gz">Source code</a>
- <a href="https://github.com/downloads/chokkan/liblbfgs/liblbfgs-1.10.tar.gz">Source code</a>
- <a href="https://github.com/chokkan/liblbfgs">GitHub repository</a>
libLBFGS is distributed under the term of the
<a href="http://opensource.org/licenses/mit-license.php">MIT license</a>.
@section modules Third-party modules
- <a href="http://cran.r-project.org/web/packages/lbfgs/index.html">lbfgs: Limited-memory BFGS Optimization (a wrapper for R)</a> maintained by Antonio Coppola.
- <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a> maintained by Lei Sun.
- <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a> maintained by Bernd Gutmann.
@section changelog History
- Version 1.10 (2010-12-22):
- Fixed compiling errors on Mac OS X; this patch was kindly submitted by
Nic Schraudolph.
- Reduced compiling warnings on Mac OS X; this patch was kindly submitted
by Tamas Nepusz.
- Replaced memalign() with posix_memalign().
- Updated solution and project files for Microsoft Visual Studio 2010.
- Version 1.9 (2010-01-29):
- Fixed a mistake in checking the validity of the parameters "ftol" and
"wolfe"; this was discovered by Kevin S. Van Horn.
Expand Down Expand Up @@ -728,6 +746,7 @@ Special thanks go to:
- Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about
OWL-QN
- Takashi Imamichi for the useful enhancements of the backtracking method
- Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes
Finally I would like to thank the original author, Jorge Nocedal, who has been
distributing the effieicnt and explanatory implementation in an open source
Expand Down
4 changes: 2 additions & 2 deletions libraries/lbfgs/src/arithmetic_ansi.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
* THE SOFTWARE.
*/

/* $Id: arithmetic_ansi.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */

#include <stdlib.h>
#include <string.h>
#include <memory.h>

#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
Expand Down
15 changes: 11 additions & 4 deletions libraries/lbfgs/src/arithmetic_sse_double.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
* THE SOFTWARE.
*/

/* $Id: arithmetic_sse_double.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */

#include <stdlib.h>
#ifndef __APPLE__
#include <malloc.h>
#endif
#include <memory.h>

#if 1400 <= _MSC_VER
Expand All @@ -39,10 +41,15 @@

inline static void* vecalloc(size_t size)
{
#ifdef _MSC_VER
#if defined(_WIN32)
void *memblock = _aligned_malloc(size, 16);
#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
void *memblock = malloc(size);
#else
void *memblock = memalign(16, size);
void *memblock = NULL, *p = NULL;
if (posix_memalign(&p, 16, size) == 0) {
memblock = p;
}
#endif
if (memblock != NULL) {
memset(memblock, 0, size);
Expand Down Expand Up @@ -192,7 +199,7 @@ inline static void vecfree(void *memblock)



#if 3 <= __SSE__
#if 3 <= __SSE__ || defined(__SSE3__)
/*
Horizontal add with haddps SSE3 instruction. The work register (rw)
is unused.
Expand Down
19 changes: 17 additions & 2 deletions libraries/lbfgs/src/arithmetic_sse_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
* THE SOFTWARE.
*/

/* $Id: arithmetic_sse_float.h 65 2010-01-29 12:19:16Z naoaki $ */
/* $Id$ */

#include <stdlib.h>
#ifndef __APPLE__
#include <malloc.h>
#endif
#include <memory.h>

#if 1400 <= _MSC_VER
Expand All @@ -45,7 +47,16 @@

inline static void* vecalloc(size_t size)
{
#if defined(_MSC_VER)
void *memblock = _aligned_malloc(size, 16);
#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
void *memblock = malloc(size);
#else
void *memblock = NULL, *p = NULL;
if (posix_memalign(&p, 16, size) == 0) {
memblock = p;
}
#endif
if (memblock != NULL) {
memset(memblock, 0, size);
}
Expand All @@ -54,7 +65,11 @@ inline static void* vecalloc(size_t size)

inline static void vecfree(void *memblock)
{
#ifdef _MSC_VER
_aligned_free(memblock);
#else
free(memblock);
#endif
}

#define vecset(x, c, n) \
Expand Down Expand Up @@ -185,7 +200,7 @@ inline static void vecfree(void *memblock)



#if 3 <= __SSE__
#if 3 <= __SSE__ || defined(__SSE3__)
/*
Horizontal add with haddps SSE3 instruction. The work register (rw)
is unused.
Expand Down
Loading

0 comments on commit ff3432f

Please sign in to comment.