Skip to content

Commit

Permalink
- Added support vector regression
Browse files Browse the repository at this point in the history
- Added test cases
  • Loading branch information
rpmcruz committed Jul 30, 2017
1 parent 4292ba6 commit 3bbb104
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 1 deletion.
2 changes: 1 addition & 1 deletion svm/julia/svm.jl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function fit(self::SVM, X::Array{Float64,2}, y::Array{Int64})
# fix support vectors
for i in 1:nobs
if y[i]*sum((self.weights .* X[i,:]) + self.bias) < 1
dw .+= (1/nobs) * (-y[i]*X[i,:])
dw += (1/nobs) * (-y[i]*X[i,:])
db += (1/nobs) * (-y[i])
end
end
Expand Down
File renamed without changes.
73 changes: 73 additions & 0 deletions svm/julia/svr.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# implementation based on Pegasos (Shwartz et al, 2007)

type SVR
lambda ::Float64

# internal
bias ::Float64
weights ::Array{Float64}

SVR(lambda) =
new(lambda, 0, [])
end

function fit(self::SVR, X::Array{Float64,2}, y::Array{Float64})
nobs = length(y)
@assert size(X, 1) == nobs

epsilon = 0.1
maxiter = 100
self.weights = zeros(size(X, 2))
self.bias = 0

# as an insipiration from resilient backpropragation, we have different
# learning rates for each weight, which we update depending whether the
# sign changes, i.e. we passed over the minimum (Riedmiller, 1994)
# the strategy in Pegasus is to use eta = 1/it
eta0 = 0.01
old_dw = zeros(size(self.weights))
old_db = 0
b_eta = eta0
w_etas = eta0 * ones(size(self.weights))
detas = [0.5, 1, 1.2]

for it in 1:maxiter
dw = zeros(size(self.weights))
db = 0

# fix support vectors
for i in 1:nobs
dist = y[i] - sum((self.weights .* X[i,:]) - self.bias)
if abs(dist) > epsilon
s = sign(epsilon-dist)
dw += (1/nobs) * X[i,:] * s
db += (1/nobs) * s
end
end

# penalty cost (lambda)
dw += 2*self.lambda*self.weights

# update learning rate
gt_0 = old_dw .* dw .> 0
ge_0 = old_dw .* dw .>= 0
w_etas .*= detas[gt_0 + ge_0 + 1]

gt_0 = old_db * db > 0
ge_0 = old_db * db >= 0
b_eta *= detas[gt_0 + ge_0 + 1]

# update values
#eta = 1/(it+2)
self.weights -= w_etas .* dw
self.bias -= b_eta * db
old_dw = dw
old_db = db
end
self
end

function predict(self::SVR, X::Array{Float64,2})
a = broadcast(.*, X, self.weights')
sum((X .* self.weights') + self.bias, 2)
end
31 changes: 31 additions & 0 deletions svm/julia/svr_test.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using MLBase
using RDatasets
include("svr.jl")

df = dataset("datasets", "airquality")
df = df[completecases(df),:]
X = Array{Float64,2}(df[:, 2:6])
y = Array{Float64}(df[:, 1])

function rmse(y, yp)
sqrt(mean((y-yp).^2))
end

for ix in Kfold(length(y), 3)
Xtr = X[ix,:]
ytr = y[ix]
not_ix = setdiff(1:length(y), ix)
Xts = X[not_ix,:]
yts = y[not_ix]

# our own OLS implementation for comparison purposes
ols = hcat(ones(size(Xtr,1)), Xtr) \ ytr
yp = hcat(ones(size(Xts,1)), Xts) * ols
@printf("OLS: RMSE: %.3f\n", rmse(yts, yp))

svr = SVR(0)
fit(svr, Xtr, ytr)
yp = predict(svr, Xts)
@printf("SVR: RMSE: %.3f\n", rmse(yts, yp))
println()
end

0 comments on commit 3bbb104

Please sign in to comment.