Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
szcf-weiya committed Jan 3, 2018
1 parent a223a46 commit cf6c1f9
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 40 deletions.
21 changes: 12 additions & 9 deletions code/boosting/Adaboost.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,20 @@ AdaBoost <- function(x, y, m = 10)
{
for (k in 1:N)
{
err = 0
#err = 0
flag.tmp = 1
cutpoint.val.tmp = x[k, j]
for (kk in 1:N)
{
pred = 1
xx = x[kk, j]
if (xx < cutpoint.val.tmp)
pred = -1
err = err + w[kk] * as.numeric(y[kk] != pred)
}
# for (kk in 1:N)
# {
# pred = 1
# xx = x[kk, j]
# if (xx < cutpoint.val.tmp)
# pred = -1
# err = err + w[kk] * as.numeric(y[kk] != pred)
# }
xj = x[, j]
pred = sapply(xj, function(x) ifelse(x < cutpoint.val.tmp, -1, 1))
err = sum(w*as.numeric(y != pred))
}
if (err > 0.5)
{
Expand Down
28 changes: 18 additions & 10 deletions code/boosting/adaboost.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,22 @@ end

function train!(model::Adaboost, X::Matrix, y::Vector)
n_sample, n_feature = size(X)
## initialize weight
w = ones(n_sample) / n_sample
threshold = 0
polarity = 0
## indicate the classification direction
## consider observation obs which is larger than cutpoint.val
## if flag = 1, then classify obs as 1
## else if flag = -1, classify obs as -1
flag = 0
feature_index = 0
alpha = 0
for i = 1:model.n_clf
## step 2(a): stump
err_max = 1e10
for feature_ind = 1:n_feature
for threshold_ind = 1:n_sample
polarity_ = 1
flag_ = 1
err = 0
threshold_ = X[threshold_ind, feature_ind]

Expand All @@ -34,28 +40,30 @@ function train!(model::Adaboost, X::Matrix, y::Vector)
err = err / sum(w)
if err > 0.5
err = 1 - err
polarity_ = -1
flag_ = -1
end

if err < err_max
err_max = err
threshold = threshold_
polarity = polarity_
flag = flag_
feature_index = feature_ind
end
end
end
## step 2(c)
#alpha = 1/2 * log((1-err_max)/(err_max))
alpha = 1/2 * log((1.000001-err_max)/(err_max+0.000001))

## step 2(d)
for j = 1:n_sample
pred = 1
x = X[j, feature_index]
if polarity * x < polarity * threshold
if flag * x < flag * threshold
pred = -1
end
w[j] = w[j] * exp(-alpha * y[j] * pred)
end
model.clf[i, :] = [feature_index, threshold, polarity, alpha]
model.clf[i, :] = [feature_index, threshold, flag, alpha]
end
end

Expand All @@ -76,10 +84,10 @@ function predict(model::Adaboost,
pred = 1
feature_index = trunc(Int64,model.clf[i, 1])
threshold = model.clf[i, 2]
polarity = model.clf[i, 3]
flag = model.clf[i, 3]
alpha = model.clf[i, 4]
x_temp = x[feature_index]
if polarity * x_temp < polarity * threshold
if flag * x_temp < flag * threshold
pred = -1
end
s += alpha * pred
Expand Down Expand Up @@ -126,7 +134,7 @@ function test_Adaboost()
model = Adaboost(n_clf=m[i])
train!(model, x_train, y_train)
predictions = predict(model, x_test)
println("The number of week classifiers ", m[i])
println("The number of weak classifiers ", m[i])
res[i] = classification_error(y_test, predictions)
println("classification error: ", res[i])
end
Expand Down
40 changes: 20 additions & 20 deletions code/boosting/res.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
1.000000,0.455800
21.000000,0.339500
41.000000,0.281200
61.000000,0.240300
81.000000,0.213000
101.000000,0.199100
121.000000,0.187000
141.000000,0.180300
161.000000,0.170400
181.000000,0.168400
201.000000,0.159200
221.000000,0.154800
241.000000,0.149000
261.000000,0.142600
281.000000,0.138500
301.000000,0.133200
321.000000,0.130400
341.000000,0.129300
361.000000,0.123800
381.000000,0.124000
1.000000,0.454800
21.000000,0.338500
41.000000,0.252400
61.000000,0.230000
81.000000,0.213600
101.000000,0.202600
121.000000,0.189100
141.000000,0.176400
161.000000,0.166100
181.000000,0.162800
201.000000,0.155500
221.000000,0.151700
241.000000,0.144800
261.000000,0.138300
281.000000,0.137100
301.000000,0.135900
321.000000,0.133400
341.000000,0.129500
361.000000,0.125000
381.000000,0.126100
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ $$

> 图10.1. AdaBoost的概要图。分类器在加权的数据集上进行训练,接着结合起来产生最终的预测。
在每步boosting的数据修改包括对每个训练观测$(x_i,y_i),i=1,2,\ldots,N$赋予权重$w_1,w_2,\ldots,w_N$。初始化所有的权重设为$w_i=1/N$,使得第一步以通常的方式对数据进行训练分类器。对每个连续的迭代$m=2,3,\ldots,M$,观测的权重进行个别地修改,然后分类算法重新应用到加权观测值上。在第$m$步,上一步中被分类器$G_{m-1}(x)$的误分类的观测值增大了权重,而正确分类的观测值权重降低了。因此当迭代继续,很难正确分类的观测受到越来越大的影响。每个相继的分类器因此被强制集中在上一步误分类的训练数据上。
在每步boosting的数据修改时,对每个训练观测$(x_i,y_i),i=1,2,\ldots,N$赋予权重$w_1,w_2,\ldots,w_N$。初始化所有的权重设为$w_i=1/N$,使得第一步以通常的方式对数据进行训练分类器。对每个接下来的迭代$m=2,3,\ldots,M$,单独修改观测的权重,然后将分类算法重新应用到加权观测值上。在第$m$步,上一步中被分类器$G_{m-1}(x)$的误分类的观测值增大了权重,而正确分类的观测值权重降低了。因此当迭代继续,很难正确分类的观测受到越来越大的影响。每个相继的分类器因此被强制集中在上一步误分类的训练数据上。

算法10.1显示了AdaBoost.M1算法的详细细节。当前的分类器$G_m(x)$由第2(a)行的加权观测值得到。在第2(b)行计算加权误差率。第2(c)行计算赋予$G_m(x)$的权重$\alpha_m$来得到最终的分类器$G(x)$(第3行)。每个观测的个体权重在第2(d)行进行更新。在导出序列中下一个分类器$G_{m+1}(x)$时,被分类器$G(x)$错误分类的观测值的权重被因子$exp(\alpha_m)$进行缩放以提高它们的相对影响。

Expand Down

0 comments on commit cf6c1f9

Please sign in to comment.