Skip to content

Commit

Permalink
issue: implement OWLQN and LBFGS and pass the unittests
Browse files Browse the repository at this point in the history
issue: rename lbfgs_minimizer_test.go to minimizer_test.go
  • Loading branch information
seaglex authored and xlvector committed Dec 30, 2013
1 parent 2eef907 commit 16f0990
Show file tree
Hide file tree
Showing 6 changed files with 260 additions and 30 deletions.
7 changes: 7 additions & 0 deletions diff_function.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package hector

// Description: function for minimizer such as LBFGS and OWLQN
type DiffFunction interface {
Value(pos *Vector) float64
Gradient(pos *Vector) *Vector
}
53 changes: 34 additions & 19 deletions lbfgs_minimizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,57 @@ package hector

import ("fmt")

const numHist int = 10
const maxIteration int = 200

/**
* It's based the paper "Scalable Training of L1-Regularized Log-Linear Models"
* by Galen Andrew and Jianfeng Gao
* user: weixuan
* To change this template use File | Settings | File Templates.
*/
type LBFGSMinimizer struct {
costFun DiffFunction
numHist int
maxIteration int
tolerance float64
}

type DiffFunction interface {
Value(pos *Vector) float64
Gradient(pos *Vector) *Vector
func NewLBFGSMinimizer() (*LBFGSMinimizer) {
m := new(LBFGSMinimizer)
m.numHist = 10
m.maxIteration = 200
m.tolerance = 1e-4
return m
}

func (minimizer *LBFGSMinimizer) Minimize(costfun DiffFunction, init *Vector) *Vector {
minimizer.costFun = costfun;
func (m *LBFGSMinimizer) Minimize(costfun DiffFunction, init *Vector) *Vector {
m.costFun = costfun;
var cost float64 = costfun.Value(init)
var grad *Vector = costfun.Gradient(init).Copy()
var pos *Vector = init
var pos *Vector = init.Copy()
var terminalCriterion *relativeMeanImprCriterion = NewRelativeMeanImprCriterion(m.tolerance)
terminalCriterion.addCost(cost)

var helper *QuasiNewtonHelper = NewQuasiNewtonHelper(numHist, minimizer, pos, grad)
var helper *QuasiNewtonHelper = NewQuasiNewtonHelper(m.numHist, m, pos, grad)
fmt.Println("Iter\tcost\timprovement")
fmt.Printf("%d\t%eN/A\n", 0, cost)
for iter:=1; iter <= maxIteration; iter++ {
fmt.Printf("%d\t%e\tUndefined\n", 0, cost)
for iter:=1; iter <= m.maxIteration; iter++ {
dir := grad.Copy()
dir.ApplyScale(-1.0)
helper.ApplyQuasiInverseHession(dir)
newCost, newPos := helper.BackTrackingLineSearch(cost, pos, grad, dir, iter==1)
if cost <= newCost {
break
}
fmt.Printf("%d\t%e\t%e\n", iter, newCost, (cost-newCost)/cost)
if (cost-newCost)/cost <= 0.0001 {
if cost == newCost {
break
}
cost = newCost
pos = newPos
terminalCriterion.addCost(cost)
fmt.Printf("%d\t%e\t%e\n", iter, newCost, terminalCriterion.improvement)
if terminalCriterion.isTerminable() {
break
}
grad = costfun.Gradient(pos).Copy()
if helper.UpdateState(pos, grad) {
break
}
}
return pos
}
Expand All @@ -47,5 +62,5 @@ func (m *LBFGSMinimizer) Evaluate(pos *Vector) float64 {
}

func (m *LBFGSMinimizer) NextPoint(curPos *Vector, dir *Vector, alpha float64) *Vector {
return curPos.ElemWiseMultiplyAdd(dir, alpha)
}
return curPos.ElemWiseMultiplyAdd(dir, alpha)
}
25 changes: 16 additions & 9 deletions lbfgs_minimizer_test.go → minimizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,20 @@ type mseDiffFunction struct {

func getMSECostFunction() *mseDiffFunction{
f := new(mseDiffFunction)
f.center.data = map[int64]float64 {0:0, 1:1}
f.center.data = map[int64]float64 {0:0, 1:0}
f.weights.data = map[int64]float64 {0:1, 1:0.01}
f.init.data = map[int64]float64 {0:1, 1:1}
f.grad.data = map[int64]float64 {0:0, 1:0}
return f
}

func (f *mseDiffFunction) Value(x *Vector) float64 {
var val float64 = 0
var cost float64 = 0
for n, val := range x.data {
diff := val - f.center.GetValue(n)
val += f.weights.GetValue(n) * diff * diff
cost += f.weights.GetValue(n) * diff * diff
}
return 0.5 * val
return 0.5 * cost
}

// Gradients for different points could use the same memory
Expand All @@ -37,23 +37,30 @@ func (f *mseDiffFunction) Gradient(x *Vector) *Vector {
f.grad.SetValue(n, f.weights.GetValue(n) * (val - f.center.GetValue(n)))
}
return &f.grad
}
}

func (f *mseDiffFunction) testResult(result *Vector, tolerance float64, t *testing.T) {
fmt.Println("Index\tTrue\tResult")
for n, val := range result.data {
for n, val := range f.center.data {
fmt.Printf("%d\t%e\t%e\n", n, val, result.GetValue(n))
}
for n, val := range result.data {
if math.Abs(val - f.center.GetValue(n)) <= tolerance {
t.Errorf("Mismatch\nIndex\tTrue\tResult\n%d\t%e\t%e", n, f.center.GetValue(n), val)
if math.Abs(val - f.center.GetValue(n)) > tolerance {
t.Errorf("Mismatch\nIndex\tTrue\tResult\n%d\t%e\t%e\n", n, f.center.GetValue(n), val)
}
}
}

func TestLBFGS(t *testing.T) {
diffFunc := getMSECostFunction()
minimizer := new(LBFGSMinimizer)
minimizer := NewLBFGSMinimizer()
result := minimizer.Minimize(diffFunc, &(diffFunc.init))
diffFunc.testResult(result, 1e-6, t)
}

func TestOWLQN(t *testing.T) {
diffFunc := getMSECostFunction()
minimizer := NewOWLQNMinimizer(0.001)
result := minimizer.Minimize(diffFunc, &(diffFunc.init))
diffFunc.testResult(result, 0, t)
}
143 changes: 143 additions & 0 deletions owlqn_minimizer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package hector

import ("fmt"
"math")

/**
* It's based the paper "Scalable Training of L1-Regularized Log-Linear Models"
* by Galen Andrew and Jianfeng Gao
* user: weixuan
* To change this template use File | Settings | File Templates.
*/
type OWLQNMinimizer struct {
l1reg float64
costFun DiffFunction
numHist int
maxIteration int
tolerance float64
}

func NewOWLQNMinimizer(l1reg float64) *OWLQNMinimizer {
m := new(OWLQNMinimizer)
m.l1reg = l1reg
m.numHist = 10
m.maxIteration = 20
m.tolerance = 1e-4
return m
}

func (m *OWLQNMinimizer) Minimize(costfun DiffFunction, init *Vector) *Vector {
m.costFun = costfun;
var cost float64 = m.Evaluate(init)
var grad *Vector = costfun.Gradient(init).Copy()
var pos *Vector = init.Copy()
var terminalCriterion *relativeMeanImprCriterion = NewRelativeMeanImprCriterion(m.tolerance)
terminalCriterion.addCost(cost)

var helper *QuasiNewtonHelper = NewQuasiNewtonHelper(m.numHist, m, pos, grad)
fmt.Println("Iter\tcost\timprovement")
fmt.Printf("%d\t%e\tUndefined\n", 0, cost)
for iter:=1; iter <= m.maxIteration; iter++ {
// customed steepest descending dir
steepestDescDir := grad.Copy()
m.updateGrad(pos, steepestDescDir)
steepestDescDir.ApplyScale(-1.0)
dir := steepestDescDir.Copy()
// quasi-newton dir
helper.ApplyQuasiInverseHession(dir)
m.fixDirSign(dir, steepestDescDir)
// customed grad for the new position
potentialGrad := grad.Copy()
m.updateGradForNewPos(pos, potentialGrad, dir)
newCost, newPos := helper.BackTrackingLineSearch(cost, pos, potentialGrad, dir, iter==1)
if cost == newCost {
break
}
cost = newCost
pos = newPos
terminalCriterion.addCost(cost)
fmt.Printf("%d\t%e\t%e\n", iter, newCost, terminalCriterion.improvement)
if terminalCriterion.isTerminable() {
break
}
grad = costfun.Gradient(pos).Copy()
if helper.UpdateState(pos, grad) {
break
}
}
return pos
}

func (m *OWLQNMinimizer) updateGradForNewPos(x *Vector, grad *Vector, dir *Vector) {
if m.l1reg == 0 {
return
}
for key, val := range grad.data {
xval := x.GetValue(key)
if xval < 0 {
grad.SetValue(key, val - m.l1reg)
} else if xval > 0 {
grad.SetValue(key, val + m.l1reg)
} else {
dirval := dir.GetValue(key)
if dirval < 0 {
grad.SetValue(key, val - m.l1reg)
} else if dirval > 0 {
grad.SetValue(key, val + m.l1reg)
}
}
}
return
}

func (m *OWLQNMinimizer) updateGrad(x *Vector, grad *Vector) {
if m.l1reg == 0 {
return
}
for key, val := range grad.data {
xval := x.GetValue(key)
if xval < 0 {
grad.SetValue(key, val - m.l1reg)
} else if xval > 0 {
grad.SetValue(key, val + m.l1reg)
} else {
if val < -m.l1reg {
grad.SetValue(key, val + m.l1reg)
} else if val > m.l1reg {
grad.SetValue(key, val - m.l1reg)
}
}
}
return
}

func (m *OWLQNMinimizer) fixDirSign(dir *Vector, steepestDescDir *Vector) {
if m.l1reg == 0 {
return
}
for key, val := range dir.data {
if val * steepestDescDir.GetValue(key) <= 0 {
dir.SetValue(key, 0)
}
}
}

func (m *OWLQNMinimizer) Evaluate(pos *Vector) float64 {
cost := m.costFun.Value(pos)
for _, val := range pos.data {
cost += math.Abs(val) * m.l1reg
}
return cost
}

func (m *OWLQNMinimizer) NextPoint(curPos *Vector, dir *Vector, alpha float64) *Vector {
newPos := curPos.ElemWiseMultiplyAdd(dir, alpha)
if m.l1reg > 0 {
for key, val := range curPos.data {
if val * newPos.GetValue(key) < 0 {
newPos.SetValue(key, 0)
}
}
}
return newPos
}
6 changes: 4 additions & 2 deletions quasinewton_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ type Minimizer interface {
}

const MAX_BACKTRACKING_ITER = 50


// Description: the pos and gradient arguments should NOT be modified outside
func NewQuasiNewtonHelper(numHist int, minimizer Minimizer, curPos *Vector, curGrad *Vector) (*QuasiNewtonHelper) {
h := new(QuasiNewtonHelper)
h.numHist = int64(numHist)
Expand Down Expand Up @@ -92,7 +93,8 @@ func (h *QuasiNewtonHelper) BackTrackingLineSearch(cost float64, pos *Vector, gr
return nextCost, nextPos
}

func (h *QuasiNewtonHelper) updateState(nextPos *Vector, nextGrad *Vector) (isOptimal bool) {
// Description: the pos and gradient arguments should NOT be modified outside
func (h *QuasiNewtonHelper) UpdateState(nextPos *Vector, nextGrad *Vector) (isOptimal bool) {
if int64(len(h.sList)) >= h.numHist {
h.sList = h.sList[1:]
h.yList = h.yList[1:]
Expand Down
56 changes: 56 additions & 0 deletions terminal_criterion.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package hector

import ("math")

/**
* It's based the paper "Scalable Training of L1-Regularized Log-Linear Models"
* by Galen Andrew and Jianfeng Gao
* user: weixuan
* To change this template use File | Settings | File Templates.
*/
type relativeMeanImprCriterion struct {
minHist int
maxHist int
tolerance float64
improvement float64
costList []float64
}

func NewRelativeMeanImprCriterion(tolerance float64) *relativeMeanImprCriterion {
tc := new(relativeMeanImprCriterion)
tc.minHist = 5
tc.maxHist = 10
tc.costList = make([]float64, 0, tc.maxHist)
tc.tolerance = tolerance
return tc
}

func (tc *relativeMeanImprCriterion) calImprovement() float64{
sz := len(tc.costList)
if sz <= tc.minHist {
return math.MaxFloat32
}
first := tc.costList[0]
last := tc.costList[sz-1]
impr := (first - last) /float64(sz-1)
if last != 0 {
impr = math.Abs(impr / last)
} else if first != 0 {
impr = math.Abs(impr / first)
} else {
impr = 0
}
if sz > tc.maxHist {
tc.costList = tc.costList[1:]
}
return impr
}

func (tc *relativeMeanImprCriterion) addCost(latestCost float64) {
tc.costList = append(tc.costList, latestCost)
tc.improvement = tc.calImprovement()
}

func (tc *relativeMeanImprCriterion) isTerminable() bool {
return tc.improvement <= tc.tolerance
}

0 comments on commit 16f0990

Please sign in to comment.