Skip to content

Commit

Permalink
add lbfgstrainer
Browse files Browse the repository at this point in the history
  • Loading branch information
Oneplus committed Mar 22, 2013
1 parent eec57ff commit 5004155
Show file tree
Hide file tree
Showing 2 changed files with 295 additions and 0 deletions.
219 changes: 219 additions & 0 deletions thirdparty/maxent/lbfgstrainer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/*
* vi:ts=4:tw=78:shiftwidth=4:expandtab
* vim600:fdm=marker
*
* lbfgstrainer.cpp - trainer for conditional ME with L-BFGS method
*
* this class utilizes the Fortran implementation of L-BFGS described in
*
* "Limited Memory BFGS Method for Large Scale Optimization"
* by Jorge Nocedal
*
* Copyright (C) 2003 by Zhang Le <[email protected]>
* Begin : 01-Jun-2003
* Last Change : 24-Dec-2004.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <cassert>
#include <cfloat>
#include <cmath>
#include <limits>
#include <algorithm>
#include <boost/timer.hpp>
#include <boost/scoped_array.hpp>
#include "lbfgstrainer.hpp"
#include "display.hpp"
#include "finite.h"
#include "lbfgs.h"

namespace maxent{
using namespace std;

void LBFGSTrainer::init_trainer() {
assert(m_params->size() > 0);
assert(m_es->size() > 0);

m_observed_expects.reset(new double[m_n_theta]);
std::fill(m_observed_expects.get(), m_observed_expects.get() + m_n_theta, 0.0);

m_N = 0;
// calculate observed feature expectations
// a hash map to hold the value of feature <pred, outcome> pair occured in event list
// which is the sum of active feature f_i(a,b) in the training set
typedef hash_map <pair<size_t, size_t>, float, featid_hasher> FeatSumMap;
FeatSumMap feat_sum;
for (vector<Event>::const_iterator it = m_es->begin();
it != m_es->end(); ++it) {
size_t len = it->context_size();
for (size_t i = 0; i < len; ++i) {
feat_sum[make_pair(it->m_context[i].first,it->m_outcome)] +=
it->m_count * it->m_context[i].second;
}
m_N += it->m_count;
}

// Get the observed expectations of the features. Strictly speaking,
// we should divide the counts by the number of Tokens, but because of
// the way the model's expectations are approximated in the
// implementation, this is cancelled out when we compute the next
// iteration of a parameter, making the extra divisions wasteful.

FeatSumMap::iterator it;
for (size_t pid = 0; pid < m_params->size(); ++pid) {
vector<pair<size_t, size_t> >& param = (*m_params)[pid];
for (size_t j = 0; j < param.size(); ++j) {
it = feat_sum.find(make_pair(pid,param[j].first));
assert(it != feat_sum.end());
if (it == feat_sum.end())
throw runtime_error("broken training data: some <pid, oid> in params not found in training data");

m_observed_expects[param[j].second] = -(it->second);
}
}

}

// test accuracy of current model on heldout events
double LBFGSTrainer::heldout_accuracy() const {
size_t correct = 0;
size_t total = 0;
vector<double> q(m_n_outcomes); // q(y|x)
for (vector<Event>::iterator it = m_heldout_es->begin();
it != m_heldout_es->end(); ++it) {
total += it->m_count;
size_t best_oid = eval(it->m_context,it->context_size(), q);
if (best_oid == it->m_outcome)
correct += it->m_count;
}
return double(correct)/total;
}

void LBFGSTrainer::train(size_t iter, double eps) {
if (!m_params || !m_es)
throw runtime_error("Can not train on an empty model");

init_trainer();

const double LOG_ZERO = log(DBL_MIN);
vector<double> q(m_n_outcomes); // q(y|x)
int n = m_n_theta;
int m = 5;
boost::scoped_array<double> grad(new double[n]);
double* g = grad.get();
double* x = m_theta.get();
fill(x, x+n, 0.0);

double f = 0.0;
size_t correct;
double heldout_acc = -1.0;
boost::timer t;

lbfgs_t* opt = lbfgs_create(n, m, eps);
if (!opt)
throw runtime_error("fail to initlize L-BFGS optimizer");

display("");
display("Starting L-BFGS iterations...");
display("Number of Predicates: %d", m_params->size());
display("Number of Outcomes: %d", m_n_outcomes);
display("Number of Parameters: %d", n);
display("Number of Corrections: %d", m);
display("Tolerance: %E", eps);
display("Gaussian Penalty: %s", (m_sigma2?"on":"off"));
#if defined(NDEBUG)
display("Optimized version");
#endif

display("iter eval loglikelihood training accuracy heldout accuracy");
display("==================================================================");

for (;opt->niter < (int)iter;) {
// calculate loglikehood and gradient
correct = 0;
f = 0.0;
std::copy(m_observed_expects.get(), m_observed_expects.get() + n, g);

for (vector<Event>::iterator it = m_es->begin();
it != m_es->end(); ++it) {
size_t best_oid = eval(it->m_context,it->context_size(), q);
if (best_oid == it->m_outcome)
correct += it->m_count;
for (size_t i = 0; i < it->context_size(); ++i) {
size_t pid = it->m_context[i].first;
float fval = it->m_context[i].second;
vector<pair<size_t, size_t> >& param = (*m_params)[pid];
for (size_t j = 0; j < param.size(); ++j) {
size_t oid = param[j].first;
size_t fid = param[j].second;
g[fid] += it->m_count * q[oid] * fval;
}
}

assert(finite(q[it->m_outcome]));
double t = log(q[it->m_outcome]);
if (finite(t))
f -= it->m_count * t;
else
f -= it->m_count * LOG_ZERO;
}

if (m_sigma2) { // applying Gaussian penality
for (size_t i = 0;i < m_n_theta; ++i) {
double penality = x[i]/m_sigma2[i];
g[i] += penality;
f += (penality * x[i]) / 2;
}
}

if (m_heldout_es && m_heldout_es->size() > 0)
heldout_acc = heldout_accuracy();

int iflag = lbfgs_run(opt, x, &f, g);

if (iflag < 0) {
lbfgs_destory(opt);
throw runtime_error("lbfgs routine stops with an error");
} else if (iflag == 0) {
display("Training terminats succesfully in %.2f seconds", t.elapsed());
break;
} else {
// continue evaluations
double acc = correct/double(m_N);
if (m_heldout_es && m_heldout_es->size() > 0) {
display("%3d %3d\t%E\t %.3f%%\t %.3f%%" , opt->niter ,
opt->nfuns , (-f/m_N) , (acc*100) , (heldout_acc * 100));
} else {
display("%3d %3d\t%E\t %.3f%%\t %s", opt->niter, opt->nfuns ,
(-f/m_N) , (acc*100) , "N/A");
}
}
}

if (opt->niter >= (int)iter)
display("Maximum numbers of %d iterations reached in %.2f seconds", iter
, t.elapsed());
display("Highest log-likelihood: %E", (-f/m_N));

lbfgs_destory(opt);
}

} // namespace maxent

76 changes: 76 additions & 0 deletions thirdparty/maxent/lbfgstrainer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* vi:ts=4:tw=78:shiftwidth=4:expandtab
* vim600:fdm=marker
*
* lbfgstrainer.hpp - a conditional ME trainer using Limited Memory BFGS
* algorithm
*
* Copyright (C) 2003 by Zhang Le <[email protected]>
* Begin : 01-Jun-2003
* Last Change : 17-Nov-2004.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/

#ifndef LBFGSTRAINER_H
#define LBFGSTRAINER_H

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "trainer.hpp"

namespace maxent{
class LBFGSTrainer : public Trainer {
public:
void train(size_t iter = 100, double eps = 1E-05);
// void save_param(const string& model, bool binary) const;

private:
// void save_param_txt(const string& model) const;
void init_trainer();
double heldout_accuracy() const;

#if !defined(_STLPORT_VERSION) && defined(_MSC_VER) && (_MSC_VER >= 1300)
// for MSVC7's hash_map declaration
class featid_hasher : public stdext::hash_compare<pair<size_t, size_t> > {
public:
size_t operator()(const pair<size_t, size_t>& p) const {
return p.first + p.second;
}

bool operator()(const pair<size_t, size_t>& k1,
const pair<size_t, size_t>& k2) {
return k1 < k2;
}
};
#else
// for hash_map of GCC & STLPORT
struct featid_hasher {
size_t operator()(const pair<size_t, size_t>& p) const {
return p.first + p.second;
}
};

#endif

shared_array<double> m_observed_expects;
};

} // namespace maxent

#endif /* ifndef LBFGSTRAINER_H */

0 comments on commit 5004155

Please sign in to comment.