Skip to content

Commit

Permalink
copy task finally working!
Browse files Browse the repository at this point in the history
  • Loading branch information
fumin committed Feb 17, 2015
1 parent 999f6a0 commit dcc85b9
Show file tree
Hide file tree
Showing 14 changed files with 39,638 additions and 183 deletions.
15 changes: 4 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,7 @@
SCP=scp -i ~/cardinalblue/cbauthenticator/config/certs/aws_ec2_piccollage.pem
REMOTE[email protected]:/home/ec2-user/gopath/src/github.com/fumin/ntm

prof:
go get github.com/fumin/ntm/copy
${GOPATH}/bin/copy -cpuprofile=copy.prof
go tool pprof ${GOPATH}/bin/copy copy.prof

scp:
${SCP} Makefile ntm.go math.go ${REMOTE}/
${SCP} copy/main.go ${REMOTE}/copy/
go get github.com/fumin/ntm/copytask/train
${GOPATH}/bin/train -cpuprofile=train.prof
go tool pprof ${GOPATH}/bin/train train.prof

clean:
rm -f copy.prof
rm -f train.prof
63 changes: 37 additions & 26 deletions addressing.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@ func NewSimilarity(u, v []Unit) *Similarity {
}

func (s *Similarity) Backward() {
for i := 0; i < len(s.U); i++ {
s.U[i].Grad += (s.V[i].Val - s.UV*s.U[i].Val/(s.Unorm*s.Unorm)) / (s.Unorm * s.Vnorm) * s.Top.Grad
s.V[i].Grad += (s.U[i].Val - s.UV*s.V[i].Val/(s.Vnorm*s.Vnorm)) / (s.Unorm * s.Vnorm) * s.Top.Grad
uvuu := s.UV / (s.Unorm * s.Unorm)
uvvv := s.UV / (s.Vnorm * s.Vnorm)
uvg := s.Top.Grad / (s.Unorm * s.Vnorm)
for i, u := range s.U {
v := s.V[i].Val
s.U[i].Grad += (v - u.Val*uvuu) * uvg
s.V[i].Grad += (u.Val - v*uvvv) * uvg
}
}

Expand Down Expand Up @@ -76,35 +80,30 @@ func NewContentAddressing(units []*BetaSimilarity) *ContentAddressing {
Units: units,
Top: make([]Unit, len(units)),
}
// Increase numerical stability by subtracting all weights by their max,
// before computing math.Exp().
var max float64 = -1
for _, u := range s.Units {
max = math.Max(max, u.Top.Val)
}
var sum float64 = 0
for i := 0; i < len(s.Top); i++ {
s.Top[i].Val = math.Exp(s.Units[i].Top.Val)
s.Top[i].Val = math.Exp(s.Units[i].Top.Val - max)
sum += s.Top[i].Val
}
for i := 0; i < len(s.Top); i++ {
s.Top[i].Val = s.Top[i].Val / sum
if math.IsNaN(s.Top[i].Val) {
us := ""
for i := range s.Units {
us = fmt.Sprintf("%s, %f, %f, %+v", us, s.Units[i].Beta.Val, s.Units[i].b, s.Units[i].Top)
}
panic(fmt.Sprintf("us: %s", us))
}
}
return &s
}

func (s *ContentAddressing) Backward() {
for i := 0; i < len(s.Units); i++ {
var grad float64 = 0
for j := 0; j < len(s.Top); j++ {
if j == i {
grad += s.Top[j].Grad * (1 - s.Top[j].Val)
} else {
grad -= s.Top[j].Grad * s.Top[j].Val
}
}
s.Units[i].Top.Grad += grad * s.Top[i].Val
var gv float64 = 0
for _, top := range s.Top {
gv += top.Grad * top.Val
}
for i, top := range s.Top {
s.Units[i].Top.Grad += (top.Grad - gv) * top.Val
}
}

Expand Down Expand Up @@ -166,7 +165,11 @@ func NewShiftedWeighting(s *Unit, wg *GatedWeighting) *ShiftedWeighting {
//if sw.Z < 0 {
// sw.Z += float64(n)
//}
sw.Z = float64(n) * Sigmoid(s.Val)

//sw.Z = float64(n) * Sigmoid(s.Val)
shift := (2*Sigmoid(s.Val) - 1) // * maxShift
sw.Z = math.Mod(shift+float64(n), float64(n))

simj := 1 - (sw.Z - math.Floor(sw.Z))
for i := 0; i < len(sw.Top); i++ {
imj := (i + int(sw.Z)) % n
Expand All @@ -186,7 +189,9 @@ func (sw *ShiftedWeighting) Backward() {
imj := (i + int(sw.Z)) % n
grad += (-sw.WG.Top[imj].Val + sw.WG.Top[(imj+1)%n].Val) * sw.Top[i].Grad
}
grad = grad * sw.Z * (1 - sw.Z/float64(n))
sig := Sigmoid(sw.S.Val)
grad = grad * 2 * sig * (1 - sig)
// grad = grad * sw.Z * (1 - sw.Z/float64(n))
sw.S.Grad += grad

simj := 1 - (sw.Z - math.Floor(sw.Z))
Expand Down Expand Up @@ -228,6 +233,9 @@ func NewRefocus(gamma *Unit, sw *ShiftedWeighting) *Refocus {

func (rf *Refocus) Backward() {
for i := 0; i < len(rf.SW.Top); i++ {
if rf.SW.Top[i].Val < machineEpsilon {
continue
}
var grad float64 = 0
for j := 0; j < len(rf.Top); j++ {
if j == i {
Expand All @@ -244,6 +252,9 @@ func (rf *Refocus) Backward() {
var lnexp float64 = 0
var s float64 = 0
for i := 0; i < len(lns); i++ {
if rf.SW.Top[i].Val < machineEpsilon {
continue
}
lns[i] = math.Log(rf.SW.Top[i].Val)
pow := math.Pow(rf.SW.Top[i].Val, rf.g)
lnexp += lns[i] * pow
Expand All @@ -252,6 +263,9 @@ func (rf *Refocus) Backward() {
lnexps := lnexp / s
var grad float64 = 0
for i := 0; i < len(rf.Top); i++ {
if rf.SW.Top[i].Val < machineEpsilon {
continue
}
grad += rf.Top[i].Grad * (rf.Top[i].Val * (lns[i] - lnexps))
}
grad = grad / (1 + math.Exp(-rf.Gamma.Val))
Expand Down Expand Up @@ -352,7 +366,6 @@ func (wm *WrittenMemory) Backward() {
var grad float64 = 0
for k := 0; k < len(wm.Top[j]); k++ {
e := wm.erase[i][k]
//gErase := wm.mTilt[j][k] / (1 - wm.Ws[i].Top[j].Val*e) * (-e)
gErase := wm.Mtm1.Top[j][k].Val * (-e)
for q := 0; q < len(wm.Ws); q++ {
if q == i {
Expand All @@ -372,7 +385,6 @@ func (wm *WrittenMemory) Backward() {
for i := 0; i < len(h.EraseVector()); i++ {
var grad float64 = 0
for j := 0; j < len(wm.Top); j++ {
//grad += wm.Top[j][i].Grad * wm.mTilt[j][i] / (1 - wm.Ws[k].Top[j].Val*wm.erase[k][i]) * (-wm.Ws[k].Top[j].Val)
gErase := wm.Mtm1.Top[j][i].Val
for q := 0; q < len(wm.Ws); q++ {
if q == k {
Expand Down Expand Up @@ -400,7 +412,6 @@ func (wm *WrittenMemory) Backward() {
// Gradient of wm.Mtm1
for i := 0; i < len(wm.Mtm1.Top); i++ {
for j := 0; j < len(wm.Mtm1.Top[i]); j++ {
//wm.Mtm1.Top[i][j].Grad += (wm.mTilt[i][j] / wm.Mtm1.Top[i][j].Val) * wm.Top[i][j].Grad
var grad float64 = 1
for q := 0; q < len(wm.Ws); q++ {
grad = grad * (1 - wm.Ws[q].Top[i].Val*wm.erase[q][j])
Expand Down
2 changes: 1 addition & 1 deletion addressing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func doAddressing(heads []*Head, memory [][]Unit) (weights [][]float64, reads []
//if s < 0 {
// s += float64(n)
//}
s := float64(n) * Sigmoid(h.S().Val)
s := math.Mod((2*Sigmoid(h.S().Val)-1)+float64(n), float64(n))
for j := 0; j < n; j++ {
imj := (j + int(s)) % n
simj := 1 - (s - math.Floor(s))
Expand Down
4 changes: 2 additions & 2 deletions cntl1_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func TestController1(t *testing.T) {
numHeads := 2
c := NewEmptyController1(len(x[0]), len(y[0]), h1Size, numHeads, n, m)
c.Weights(func(u *Unit) { u.Val = 2 * rand.Float64() })
forwardBackward(c, x, y)
ForwardBackward(c, x, y)

l := loss(c, Controller1Forward, x, y)
checkGradients(t, c, Controller1Forward, x, y, l)
Expand Down Expand Up @@ -76,7 +76,7 @@ func Controller1Forward(c1 Controller, reads [][]float64, x []float64) ([]float6
}

func loss(c Controller, forward func(Controller, [][]float64, []float64) ([]float64, []*Head), in, out [][]float64) float64 {
// Initialize memory as in the function forwardBackward
// Initialize memory as in the function ForwardBackward
mem := c.Mtm1BiasV().Top
wtm1Bs := c.Wtm1BiasV()
wtm1s := make([]*Refocus, c.NumHeads())
Expand Down
122 changes: 0 additions & 122 deletions copy/main.go

This file was deleted.

41 changes: 41 additions & 0 deletions copytask/copytask.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package copytask

import (
"math/rand"
)

func GenSeq(size, vectorSize int) ([][]float64, [][]float64) {
data := make([][]float64, size)
for i := 0; i < len(data); i++ {
data[i] = make([]float64, vectorSize)
for j := 0; j < len(data[i]); j++ {
data[i][j] = float64(rand.Intn(2))
}
}

input := make([][]float64, size*2+2)
for i := 0; i < len(input); i++ {
input[i] = make([]float64, vectorSize+2)
if i == 0 {
input[i][vectorSize] = 1
} else if i <= size {
for j := 0; j < vectorSize; j++ {
input[i][j] = data[i-1][j]
}
} else if i == size+1 {
input[i][vectorSize+1] = 1
}
}

output := make([][]float64, size*2+2)
for i := 0; i < len(output); i++ {
output[i] = make([]float64, vectorSize)
if i >= size+2 {
for j := 0; j < vectorSize; j++ {
output[i][j] = data[i-(size+2)][j]
}
}
}

return input, output
}
Loading

0 comments on commit dcc85b9

Please sign in to comment.