Skip to content

Commit

Permalink
optimize without changing any subtle floating point precision issues
Browse files Browse the repository at this point in the history
  • Loading branch information
fumin committed Feb 16, 2015
1 parent 0707d69 commit 999f6a0
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 89 deletions.
10 changes: 5 additions & 5 deletions addressing.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,15 +122,15 @@ func NewGatedWeighting(g *Unit, wc *ContentAddressing, wtm1 *Refocus) *GatedWeig
Wtm1: wtm1,
Top: make([]Unit, len(wc.Top)),
}
gt := sigmoid(g.Val)
gt := Sigmoid(g.Val)
for i := 0; i < len(wg.Top); i++ {
wg.Top[i].Val = gt*wc.Top[i].Val + (1-gt)*wtm1.Top[i].Val
}
return &wg
}

func (wg *GatedWeighting) Backward() {
gt := sigmoid(wg.G.Val)
gt := Sigmoid(wg.G.Val)

var grad float64 = 0
for i := 0; i < len(wg.Top); i++ {
Expand Down Expand Up @@ -166,7 +166,7 @@ func NewShiftedWeighting(s *Unit, wg *GatedWeighting) *ShiftedWeighting {
//if sw.Z < 0 {
// sw.Z += float64(n)
//}
sw.Z = float64(n) * sigmoid(s.Val)
sw.Z = float64(n) * Sigmoid(s.Val)
simj := 1 - (sw.Z - math.Floor(sw.Z))
for i := 0; i < len(sw.Top); i++ {
imj := (i + int(sw.Z)) % n
Expand Down Expand Up @@ -325,8 +325,8 @@ func NewWrittenMemory(ws []*Refocus, heads []*Head, mtm1 *WrittenMemory) *Writte
eraseVec := wm.Heads[i].EraseVector()
addVec := wm.Heads[i].AddVector()
for j := 0; j < len(wm.erase[i]); j++ {
wm.erase[i][j] = sigmoid(eraseVec[j].Val)
wm.add[i][j] = sigmoid(addVec[j].Val)
wm.erase[i][j] = Sigmoid(eraseVec[j].Val)
wm.add[i][j] = Sigmoid(addVec[j].Val)
}
}

Expand Down
8 changes: 4 additions & 4 deletions addressing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func doAddressing(heads []*Head, memory [][]Unit) (weights [][]float64, reads []
}

// Content-based, location-based addressing gate
g := sigmoid(h.G().Val)
g := Sigmoid(h.G().Val)
for j := 0; j < len(wc); j++ {
wc[j] = g*wc[j] + (1-g)*h.Wtm1.Top[j].Val
}
Expand All @@ -100,7 +100,7 @@ func doAddressing(heads []*Head, memory [][]Unit) (weights [][]float64, reads []
//if s < 0 {
// s += float64(n)
//}
s := float64(n) * sigmoid(h.S().Val)
s := float64(n) * Sigmoid(h.S().Val)
for j := 0; j < n; j++ {
imj := (j + int(s)) % n
simj := 1 - (s - math.Floor(s))
Expand Down Expand Up @@ -134,11 +134,11 @@ func doAddressing(heads []*Head, memory [][]Unit) (weights [][]float64, reads []
for k := 0; k < len(heads); k++ {
eraseVec := heads[k].EraseVector()
for i := 0; i < len(erase[k]); i++ {
erase[k][i] = sigmoid(eraseVec[i].Val)
erase[k][i] = Sigmoid(eraseVec[i].Val)
}
addVec := heads[k].AddVector()
for i := 0; i < len(add[k]); i++ {
add[k][i] = sigmoid(addVec[i].Val)
add[k][i] = Sigmoid(addVec[i].Val)
}
}
newMem = MakeTensor2(len(memory), len(memory[0]))
Expand Down
141 changes: 88 additions & 53 deletions cntl1.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,94 +68,109 @@ func (old *Controller1) Forward(reads []*Read, x []float64) Controller {
heads: make([]*Head, len(reads)),
}

for i := 0; i < len(c.H1); i++ {
var v float64 = 0
for j := 0; j < len(reads); j++ {
for k := 0; k < len(reads[j].Top); k++ {
v += c.Wh1r[i][j][k].Val * reads[j].Top[k].Val
var v float64
for i, wh1ri := range c.Wh1r {
wh1xi := c.Wh1x[i]
v = 0
for j, wh1rij := range wh1ri {
read := reads[j]
for k, wh1rijk := range wh1rij {
v += wh1rijk.Val * read.Top[k].Val
}
}
for j := 0; j < len(x); j++ {
v += c.Wh1x[i][j].Val * x[j]
for j, wh1xij := range wh1xi {
v += wh1xij.Val * x[j]
}
v += c.Wh1b[i].Val
c.H1[i].Val = sigmoid(v)
c.H1[i].Val = Sigmoid(v)
}

for i := 0; i < len(c.y); i++ {
var v float64 = 0
for j := 0; j < len(c.H1); j++ {
v += c.Wyh1[i][j].Val * c.H1[j].Val
for i, wyh1i := range c.Wyh1 {
v = 0
for j, wyh1ij := range wyh1i[0:len(c.H1)] {
v += wyh1ij.Val * c.H1[j].Val
}
v += c.Wyh1[i][len(c.H1)].Val
c.y[i].Val = sigmoid(v)
c.y[i].Val = Sigmoid(v)
}
memoryM := len(reads[0].Top)
for i := 0; i < len(c.heads); i++ {
for i, wuh1i := range c.Wuh1 {
c.heads[i] = NewHead(memoryM)
for j := 0; j < len(c.heads[i].units); j++ {
maxK := len(c.Wuh1[i][j]) - 1
for k := 0; k < maxK; k++ {
c.heads[i].units[j].Val += c.Wuh1[i][j][k].Val * c.H1[k].Val
head := c.heads[i]
for j, wuh1ij := range wuh1i {
v = 0
for k, wuh1ijk := range wuh1ij[0:len(c.H1)] {
v += wuh1ijk.Val * c.H1[k].Val
}
c.heads[i].units[j].Val += c.Wuh1[i][j][maxK].Val
v += wuh1ij[len(c.H1)].Val
head.units[j].Val += v
}
}

return &c
}

func (c *Controller1) Backward() {
for i := 0; i < len(c.H1); i++ {
var grad float64 = 0
for j := 0; j < len(c.y); j++ {
grad += c.y[j].Grad * c.Wyh1[j][i].Val
}
for j := 0; j < len(c.heads); j++ {
for k := 0; k < len(c.heads[j].units); k++ {
grad += c.heads[j].units[k].Grad * c.Wuh1[j][k][i].Val
for j, y := range c.y {
for i, wyh1 := range c.Wyh1[j][0:len(c.H1)] {
c.H1[i].Grad += wyh1.Val * y.Grad
}
}
for j, head := range c.heads {
wuh1j := c.Wuh1[j]
for k, h := range head.units {
for i, wuh1jki := range wuh1j[k][0:len(c.H1)] {
c.H1[i].Grad += h.Grad * wuh1jki.Val
}
}
c.H1[i].Grad += grad
}
for i := 0; i < len(c.Wyh1); i++ {
maxJ := len(c.Wyh1[i]) - 1
for j := 0; j < maxJ; j++ {
c.Wyh1[i][j].Grad += c.y[i].Grad * c.H1[j].Val
for i, wyh1i := range c.Wyh1 {
yGrad := c.y[i].Grad
for j, h1 := range c.H1 {
wyh1i[j].Grad += yGrad * h1.Val
}
c.Wyh1[i][maxJ].Grad += c.y[i].Grad
wyh1i[len(wyh1i)-1].Grad += yGrad
}
for i := 0; i < len(c.Wuh1); i++ {
for j := 0; j < len(c.Wuh1[i]); j++ {
maxK := len(c.Wuh1[i][j]) - 1
for k := 0; k < maxK; k++ {
c.Wuh1[i][j][k].Grad += c.heads[i].units[j].Grad * c.H1[k].Val
for i, wuh1i := range c.Wuh1 {
for j, head := range c.heads[i].units {
wuh1ij := wuh1i[j]
for k, h1 := range c.H1 {
wuh1ij[k].Grad += head.Grad * h1.Val
}
c.Wuh1[i][j][maxK].Grad += c.heads[i].units[j].Grad
wuh1ij[len(wuh1ij)-1].Grad += head.Grad
}
}

for i := 0; i < len(c.Reads); i++ {
for j := 0; j < len(c.Reads[i].Top); j++ {
for k := 0; k < len(c.H1); k++ {
c.Reads[i].Top[j].Grad += c.H1[k].Grad * c.H1[k].Val * (1 - c.H1[k].Val) * c.Wh1r[k][i][j].Val
h1Grads := make([]float64, len(c.H1))
for i, h1 := range c.H1 {
h1Grads[i] = h1.Grad * h1.Val * (1 - h1.Val)
}

for k, h1g := range h1Grads {
wh1rk := c.Wh1r[k]
for i, read := range c.Reads {
wh1rki := wh1rk[i]
for j, wh1rkij := range wh1rki {
read.Top[j].Grad += h1g * wh1rkij.Val
}
}
}
for i := 0; i < len(c.Wh1r); i++ {
for j := 0; j < len(c.Wh1r[i]); j++ {
for k := 0; k < len(c.Wh1r[i][j]); k++ {
c.Wh1r[i][j][k].Grad += c.H1[i].Grad * c.H1[i].Val * (1 - c.H1[i].Val) * c.Reads[j].Top[k].Val
for i, wh1ri := range c.Wh1r {
h1g := h1Grads[i]
for j, wh1rij := range wh1ri {
for k, read := range c.Reads[j].Top {
wh1rij[k].Grad += h1g * read.Val
}
}
}
for i := 0; i < len(c.Wh1x); i++ {
for j := 0; j < len(c.Wh1x[i]); j++ {
c.Wh1x[i][j].Grad += c.H1[i].Grad * c.H1[i].Val * (1 - c.H1[i].Val) * c.X[j]
for i, wh1xi := range c.Wh1x {
h1g := h1Grads[i]
for j, x := range c.X {
wh1xi[j].Grad += h1g * x
}
}
for i := 0; i < len(c.Wh1b); i++ {
c.Wh1b[i].Grad += c.H1[i].Grad * c.H1[i].Val * (1 - c.H1[i].Val)
for i, h1g := range h1Grads {
c.Wh1b[i].Grad += h1g
}
}

Expand All @@ -167,7 +182,27 @@ func (c *Controller1) Mtm1BiasV() *WrittenMemory {
return c.mtm1
}

func (c *Controller1) Weights(f func(string, *Unit)) {
func (c *Controller1) Weights(f func(*Unit)) {
for _, wtm1 := range c.wtm1s {
for _, w := range wtm1 {
f(&w.Top)
}
}
for _, row := range c.mtm1.Top {
for i := range row {
f(&row[i])
}
}
doUnit2(c.Wyh1, func(ids []int, u *Unit) { f(u) })
doUnit3(c.Wuh1, func(ids []int, u *Unit) { f(u) })
doUnit3(c.Wh1r, func(ids []int, u *Unit) { f(u) })
doUnit2(c.Wh1x, func(ids []int, u *Unit) { f(u) })
doUnit1(c.Wh1b, func(ids []int, u *Unit) { f(u) })
}

// WeightsVerbose is similar to Weights, but with additional information passed in.
// Avoid using this function except for debugging, as it calls fmt.Sprintf many times which is a performance hog.
func (c *Controller1) WeightsVerbose(f func(string, *Unit)) {
for i, wtm1 := range c.wtm1s {
for j, w := range wtm1 {
f(fmt.Sprintf("wtm1[%d][%d]", i, j), &w.Top)
Expand Down
12 changes: 6 additions & 6 deletions cntl1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ import (

func TestController1(t *testing.T) {
times := 10
x := MakeTensor2(times, 2)
x := MakeTensor2(times, 4)
for i := 0; i < len(x); i++ {
for j := 0; j < len(x[i]); j++ {
x[i][j] = rand.Float64()
}
}
y := MakeTensor2(times, 2)
y := MakeTensor2(times, 4)
for i := 0; i < len(y); i++ {
for j := 0; j < len(y[i]); j++ {
y[i][j] = rand.Float64()
Expand All @@ -25,7 +25,7 @@ func TestController1(t *testing.T) {
h1Size := 3
numHeads := 2
c := NewEmptyController1(len(x[0]), len(y[0]), h1Size, numHeads, n, m)
c.Weights(func(tag string, u *Unit) { u.Val = 2 * rand.Float64() })
c.Weights(func(u *Unit) { u.Val = 2 * rand.Float64() })
forwardBackward(c, x, y)

l := loss(c, Controller1Forward, x, y)
Expand All @@ -47,7 +47,7 @@ func Controller1Forward(c1 Controller, reads [][]float64, x []float64) ([]float6
v += c.Wh1x[i][j].Val * x[j]
}
v += c.Wh1b[i].Val
h1[i] = sigmoid(v)
h1[i] = Sigmoid(v)
}
prediction := make([]float64, len(c.Wyh1))
for i := 0; i < len(prediction); i++ {
Expand All @@ -57,7 +57,7 @@ func Controller1Forward(c1 Controller, reads [][]float64, x []float64) ([]float6
v += c.Wyh1[i][j].Val * h1[j]
}
v += c.Wyh1[i][maxJ].Val
prediction[i] = sigmoid(v)
prediction[i] = Sigmoid(v)
}
numHeads := len(c.Wh1r[0])
m := len(c.Wh1r[0][0])
Expand Down Expand Up @@ -127,7 +127,7 @@ func loss(c Controller, forward func(Controller, [][]float64, []float64) ([]floa
}

func checkGradients(t *testing.T, c Controller, forward func(Controller, [][]float64, []float64) ([]float64, []*Head), in, out [][]float64, lx float64) {
c.Weights(func(tag string, w *Unit) {
c.WeightsVerbose(func(tag string, w *Unit) {
x := w.Val
h := machineEpsilonSqrt * math.Max(math.Abs(x), 1)
xph := x + h
Expand Down
31 changes: 15 additions & 16 deletions copy/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"flag"
"fmt"
"log"
"math"
"math/rand"
Expand Down Expand Up @@ -63,7 +62,7 @@ func main() {
defer pprof.StopCPUProfile()
}

var seed int64 = 4
var seed int64 = 2
rand.Seed(seed)
log.Printf("seed: %d", seed)

Expand All @@ -74,7 +73,7 @@ func main() {
m := 20
c := ntm.NewEmptyController1(vectorSize+2, vectorSize, h1Size, numHeads, n, m)
// Weights cannot be zero, or else we have division by zero in cosine similarity of content addressing.
c.Weights(func(tag string, u *ntm.Unit) { u.Val = 1 * (rand.Float64() - 0.5) })
c.Weights(func(u *ntm.Unit) { u.Val = 1 * (rand.Float64() - 0.5) })

//sgd := ntm.NewSGDMomentum(c)
rmsp := ntm.NewRMSProp(c)
Expand All @@ -89,19 +88,19 @@ func main() {
}

if i%1000 == 0 {
log.Printf("y: %+v", y)
pred := "prediction: "
for t := range y {
mymy := machines[t].Controller.Y()
pred += "["
for i := range y[t] {
pred = fmt.Sprintf("%s %.2f", pred, mymy[i].Val)
}
pred += "]"
}
log.Printf(pred)
h := machines[len(y)-3].Controller.Heads()[0]
log.Printf("beta: %f, g: %f, s: %f, gamma: %f, erase: %+v, add: %+v, k: %+v", h.Beta(), h.G(), h.S(), h.Gamma(), h.EraseVector(), h.AddVector(), h.K())
//log.Printf("y: %+v", y)
//pred := "prediction: "
//for t := range y {
// mymy := machines[t].Controller.Y()
// pred += "["
// for i := range y[t] {
// pred = fmt.Sprintf("%s %.2f", pred, mymy[i].Val)
// }
// pred += "]"
//}
//log.Printf(pred)
//h := machines[len(y)-3].Controller.Heads()[0]
//log.Printf("beta: %f, g: %f, s: %f, gamma: %f, erase: %+v, add: %+v, k: %+v", h.Beta(), h.G(), h.S(), h.Gamma(), h.EraseVector(), h.AddVector(), h.K())
}

}
Expand Down
2 changes: 1 addition & 1 deletion math.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const (
machineEpsilonSqrt = 1e-8 // math.Sqrt(machineEpsilon)
)

func sigmoid(x float64) float64 {
func Sigmoid(x float64) float64 {
return 1.0 / (1 + math.Exp(-x))
}

Expand Down
Loading

0 comments on commit 999f6a0

Please sign in to comment.