Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
xlvector committed Dec 3, 2013
2 parents d753b7d + 492c6da commit 7a259e3
Show file tree
Hide file tree
Showing 12 changed files with 200 additions and 53 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
data
hector-mc-cv
hector-mc-run
hector-stack
Expand Down
13 changes: 13 additions & 0 deletions array_vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package hector
import (
"strings"
"strconv"
"math"
)

type ArrayVector struct {
Expand Down Expand Up @@ -126,4 +127,16 @@ func (v *ArrayVector) Scale(s float64) {
for i, _ := range v.data {
v.data[i] *= s
}
}

func (v *ArrayVector) SoftMaxNorm() *ArrayVector {
sum := 0.0
for _, val := range v.data {
sum += math.Exp(val)
}
ret := NewArrayVector()
for key, val := range v.data {
ret.SetValue(key, math.Exp(val) / sum)
}
return ret
}
16 changes: 9 additions & 7 deletions bin/hector-cv.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,15 @@ func main(){
total := int(cv)

if profile != "" {
f, err := os.Create(profile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
fmt.Println(profile)
f, err := os.Create(profile)
if err != nil {
fmt.Println("%v", err)
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}

average_auc := 0.0
for part := 0; part < total; part++ {
Expand Down
20 changes: 12 additions & 8 deletions bin/hector-mc-cv.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import(
"strconv"
"fmt"
"runtime/pprof"
"runtime"
"os"
"log"
)
Expand All @@ -30,18 +31,18 @@ func main(){
profile, _ := params["profile"]
dataset := hector.NewDataSet()
dataset.Load(train_path, global)

cv, _ := strconv.ParseInt(params["cv"], 10, 32)
total := int(cv)

if profile != "" {
f, err := os.Create(profile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
f, err := os.Create(profile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}

average_accuracy := 0.0
for part := 0; part < total; part++ {
Expand All @@ -52,6 +53,9 @@ func main(){
fmt.Println("accuracy : ", accuracy)
average_accuracy += accuracy
classifier = nil
train = nil
test = nil
runtime.GC()
}
fmt.Println(average_accuracy / float64(total))
}
14 changes: 14 additions & 0 deletions bin/hector-mc-run.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ package main
import(
"hector"
"fmt"
"log"
"runtime/pprof"
"os"
)

func main(){
Expand All @@ -12,6 +15,17 @@ func main(){

classifier := hector.GetMutliClassClassifier(method)

profile, _ := params["profile"]
if profile != "" {
fmt.Printf("Profile data => %s\n", profile)
f, err := os.Create(profile)
if err != nil {
log.Fatal(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}

if action == "" {
accuracy, _ := hector.MultiClassRun(classifier, train, test, pred, params)
fmt.Println("accuracy : ", accuracy)
Expand Down
10 changes: 5 additions & 5 deletions classifier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func TestClassifiers(t *testing.T){
params["min-leaf-size"] = "5"
params["tree-count"] = "10"
params["learning-rate"] = "0.05"
params["regularization"] = "0.01"
params["regularization"] = "0.0001"
params["e"] = "0.1"
params["c"] = "0.1"
params["gini"] = "1.0"
Expand All @@ -42,14 +42,14 @@ func TestClassifiersOnXOR(t *testing.T) {
algos := []string{"ann", "rf", "rdt", "knn"}

params := make(map[string]string)
params["steps"] = "10"
params["steps"] = "30"
params["max-depth"] = "10"
params["min-leaf-size"] = "10"
params["tree-count"] = "100"
params["learning-rate"] = "0.05"
params["regularization"] = "0.01"
params["learning-rate"] = "0.01"
params["regularization"] = "0.0001"
params["gini"] = "1.0"
params["hidden"] = "10"
params["hidden"] = "15"
params["k"] = "10"
params["feature-count"] = "1.0"
params["dt-sample-ratio"] = "1.0"
Expand Down
8 changes: 4 additions & 4 deletions matrix.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@ func (m *Matrix) AddValue(k1, k2 int64, v float64){
}

func (m *Matrix) SetValue(k1, k2 int64, v float64){
_, ok := m.data[k1]
row, ok := m.data[k1]
if !ok {
m.data[k1] = NewVector()
row = NewVector()
m.data[k1] = row
}
m.data[k1].SetValue(k2, v)
row.SetValue(k2, v)
}

func (m *Matrix) GetValue(k1, k2 int64) float64 {
Expand All @@ -42,7 +43,6 @@ func (m *Matrix) GetRow(k1 int64) *Vector {
} else {
return row
}
return row
}

func (m *Matrix) Scale(scale float64) *Matrix {
Expand Down
130 changes: 105 additions & 25 deletions neural_network.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,31 @@ import(
"strconv"
"math/rand"
"math"
"fmt"
)

type NeuralNetworkParams struct {
LearningRate float64
LearningRateDiscount float64
Regularization float64
Hidden int64
Steps int
Verbose int
}

type TwoLayerWeights struct {
L1 *Matrix
L2 *Matrix
}


/*
Please refer to this chapter to know algorithm details :
http://www4.rgu.ac.uk/files/chapter3%20-%20bp.pdf
*/
type NeuralNetwork struct {
Model TwoLayerWeights
MaxLabel int64
Params NeuralNetworkParams
}

Expand All @@ -41,16 +51,22 @@ func (self *NeuralNetwork) LoadModel(path string){

func (algo *NeuralNetwork) Init(params map[string]string) {
algo.Params.LearningRate, _ = strconv.ParseFloat(params["learning-rate"], 64)
algo.Params.LearningRateDiscount, _ = strconv.ParseFloat(params["learning-rate-discount"], 64)
algo.Params.Regularization, _ = strconv.ParseFloat(params["regularization"], 64)
steps, _ := strconv.ParseInt(params["steps"], 10, 32)
hidden, _ := strconv.ParseInt(params["hidden"], 10, 64)
verbose, _ := strconv.ParseInt(params["verbose"], 10, 32)

algo.Params.Steps = int(steps)
algo.Params.Hidden = int64(hidden)
algo.Params.Verbose = int(verbose)
}

func (algo *NeuralNetwork) Train(dataset * DataSet) {
algo.Model = TwoLayerWeights{}

algo.Model.L1 = NewMatrix()
algo.Model.L2 = NewMatrix()

for i := int64(0); i < algo.Params.Hidden; i++ {
algo.Model.L1.data[i] = NewVector()
}
Expand All @@ -71,53 +87,117 @@ func (algo *NeuralNetwork) Train(dataset * DataSet) {
}
}
}
algo.MaxLabel = int64(max_label)

algo.Model.L2 = NewMatrix()
for i := int64(0); i < algo.Params.Hidden; i++ {
for j := 0; j <= max_label; j++ {
algo.Model.L2.SetValue(i, j, (rand.Float64() - 0.5) / math.Sqrt(float64(max_label) + 1.0))
for i := int64(0); i <= algo.Params.Hidden; i++ {
for j := int64(0); j < algo.MaxLabel; j++ {
algo.Model.L2.SetValue(i, j, (rand.NormFloat64() / math.Sqrt(float64(algo.MaxLabel) + 1.0)))
}
}

for step := 0; step < algo.Params.Steps; step++{
if algo.Params.Verbose <= 0 {
fmt.Printf(".")
}
total := len(dataset.Samples)
counter := 0
for _, sample := range dataset.Samples {
y := NewVector()
z := NewVector()
e := NewVector()
delta_hidden := NewVector()

for i := int64(0); i < algo.Params.Hidden; i++ {
sum := float64(0)
wi := algo.Model.L1.data[i]
for _, f := range sample.Features {
sum += f.Value * algo.Model.L1.data[i].GetValue(f.Id)
sum += f.Value * wi.GetValue(f.Id)
}
y.data[i] = Sigmoid(sum)
for j := 0; j <= max_label; j++ {
z.AddValue(j, y.GetValue(i) * algo.Model.L2.GetValue(i, j))
}
y.data[algo.Params.Hidden] = 1.0
for i := int64(0); i < algo.MaxLabel; i++ {
sum := float64(0)
for j := int64(0); j <= algo.Params.Hidden; j++ {
sum += y.GetValue(j)*algo.Model.L2.GetValue(j, i)
}
z.SetValue(i, sum)
}
z = z.SoftMaxNorm()
e.SetValue(int64(sample.Label), 1.0)
e.AddVector(z, -1.0)

err = NewVector()
err.AddValue(sample.Label, 1.0)
err.AddVector(z, -1.0)
sig := NewVector()
for key, val := range y.data {
sig.SetValue(key, err * algo.Model.L2.GetValue(key) * (1-val) * val)
}
for key, val := range algo.Model.L2.data {
algo.Model.L2.SetValue(key, val + algo.Params.LearningRate * y.GetValue(key) * err)
for i := int64(0); i <= algo.Params.Hidden; i++ {
delta := float64(0)
for j := int64(0); j < algo.MaxLabel; j++ {
wij := algo.Model.L2.GetValue(i, j)
sig_ij := e.GetValue(j) * (1-z.GetValue(j)) * z.GetValue(j)
delta += sig_ij * wij
wij += algo.Params.LearningRate * (y.GetValue(i) * sig_ij - algo.Params.Regularization * wij)
algo.Model.L2.SetValue(i, j, wij)
}
delta_hidden.SetValue(i, delta)
}
for i, s := range sig.data {
if s != 0 {
for _, f := range sample.Features {
val := algo.Model.L1.data[i].GetValue(f.Id)
algo.Model.L1.SetValue(i, f.Id, val + algo.Params.LearningRate * s * f.Value)
}

for i := int64(0); i < algo.Params.Hidden; i++ {
wi := algo.Model.L1.data[i]
for _, f := range sample.Features {
wji := wi.GetValue(f.Id)
wji += algo.Params.LearningRate * (delta_hidden.GetValue(i) * f.Value * y.GetValue(i) * (1-y.GetValue(i)) - algo.Params.Regularization * wji)
wi.SetValue(f.Id, wji)
}
}
counter++
if algo.Params.Verbose > 0 && counter % 2000 == 0 {
fmt.Printf("Epoch %d %f%%\n", step+1, float64(counter)/float64(total)*100)
}
}

if algo.Params.Verbose > 0 {
algo.Evaluate(dataset)
}
algo.Params.LearningRate *= algo.Params.LearningRateDiscount
}
fmt.Println()
}

func (algo *NeuralNetwork) Predict(sample * Sample) float64 {
return Sigmoid(((algo.Model.L1.MultiplyVector(sample.GetFeatureVector())).ApplyOnElem(Sigmoid)).Dot(algo.Model.L2))
func (algo *NeuralNetwork) PredictMultiClass(sample * Sample) * ArrayVector {
y := NewVector()
z := NewArrayVector()
for i := int64(0); i < algo.Params.Hidden; i++ {
sum := float64(0)
for _, f := range sample.Features {
sum += f.Value * algo.Model.L1.data[i].GetValue(f.Id)
}
y.data[i] = Sigmoid(sum)
}
y.data[algo.Params.Hidden] = 1
for i := 0; i < int(algo.MaxLabel); i++ {
sum := float64(0)
for j := int64(0); j <= algo.Params.Hidden; j++ {
sum += y.GetValue(j) * algo.Model.L2.GetValue(j, int64(i))
}
z.SetValue(i, sum)
}
z = z.SoftMaxNorm()
return z
}

func (algo *NeuralNetwork) Predict(sample *Sample) float64 {
z := algo.PredictMultiClass(sample)
return z.GetValue(1)
}

func (algo *NeuralNetwork) Evaluate(dataset *DataSet) {
accuracy := 0.0
total := 0.0
for _, sample := range dataset.Samples {
prediction := algo.PredictMultiClass(sample)
label, _ := prediction.KeyWithMaxValue()
if int(label) == sample.Label {
accuracy += 1.0
}
total += 1.0
}
fmt.Printf("accuracy %f%%\n", accuracy/total*100)
}
Loading

0 comments on commit 7a259e3

Please sign in to comment.