Skip to content

Commit

Permalink
resolve szcf-weiya#190
Browse files Browse the repository at this point in the history
  • Loading branch information
szcf-weiya committed Apr 19, 2019
1 parent 6bd26ad commit 2e8d4f0
Show file tree
Hide file tree
Showing 6 changed files with 780 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
*.html linguist-vendored
*.js linguist-vendored
docs/notes/* linguist-documentation=false
code/rbm/* linguist-vendored
code/rbm/* linguist-vendored
toy.ipynb linguist-vendored
Binary file added imgs/fig.16.8/res.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions imgs/fig.16.8/res.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
3.471089278426772395e+00 3.694484974913252895e+00 3.221193494798355594e+00 2.629942061466373548e+00 2.981713992313058448e+00 3.227424906838612717e+00 3.161064671670890558e+00 3.265299525023363625e+00 3.041491391170615177e+00 2.840795689491775633e+00
3.913158516016176591e+00 3.271695872319472009e+00 2.835988244334160413e+00 3.088715779718863796e+00 2.704388051963395601e+00 2.918292647937700934e+00 2.806815158674249133e+00 2.500583084987830329e+00 2.690481447579023744e+00 2.466965132557395801e+00
5.262305622808069572e+00 5.487721583905344147e+00 5.512739370523116911e+00 4.408297653087700496e+00 3.437629788385348384e+00 2.927474280269209306e+00 3.134533486759246479e+00 2.880237251101936735e+00 2.926797973359059224e+00 2.857133290885807586e+00
5.002678448431630009e+00 5.292396072786397454e+00 4.536936805041232645e+00 4.860229915319444594e+00 4.415930135886694252e+00 3.926863709391194668e+00 3.698655840904252923e+00 3.864343900512865471e+00 3.700162351995403665e+00 3.387540785921615782e+00
93 changes: 93 additions & 0 deletions imgs/fig.16.8/src.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Lasso
import numpy as np
from matplotlib import pyplot as plt


def genXY(N, p):
X = np.random.rand(N*p).reshape(N, p)
y = 10 * np.exp(-2*np.sum(X[:,0:5]**2, axis = 1)) + np.sum(X[:,5:35], axis = 1) + np.random.normal(0, 1.3, N)
return X, y

def mse(regr):
X_test, y_test = genXY(500, 100)
y_pred = regr.predict(X_test)
return np.mean((y_pred - y_test)**2)

def ISLE(X, y, eta = 0.5, nu = 0.1, M = 10, model = "GB", lam = 0.1):
N = len(X)
# induce basis functions via GB or RF
if model == "GB":
regr = GradientBoostingRegressor(n_estimators=M, learning_rate=0.01) #array shape (M, 1)
regr.fit(X, y)
bs = regr.estimators_[:,0]
else:
regr = RandomForestRegressor(n_estimators=M) # list
regr.fit(X, y)
bs = regr.estimators_

# initialize f0
f_hat = np.repeat(np.mean(y), N)
for m in range(M):
Sm_idx = np.random.choice(N, int(eta*N), replace = False)
bs[m].fit(X[Sm_idx], y[Sm_idx] - f_hat[Sm_idx])
f_hat = f_hat + nu * bs[m].predict(X)

# fit lasso path
lasso = Lasso(alpha = lam)
Tx = np.empty([N, M])
for i in range(M):
Tx[:,i] = bs[i].predict(X)
lasso.fit(Tx, y)
nT = sum(lasso.coef_ != 0)
return bs, lasso, nT

def mse_isle(bs, lasso):
M = len(bs)
X_test, y_test = genXY(500, 100)
Tx = np.empty([len(X_test), M])
for i in range(M):
Tx[:, i] = bs[i].predict(X_test)
y_pred = lasso.predict(Tx)
return np.mean((y_pred - y_test)**2)

if __name__ == '__main__':
N = 1000
p = 100
X, y = genXY(1000,100)
# random forest
rf = RandomForestRegressor(n_estimators=2500)
rf.fit(X,y)
rf_mse = mse(rf)
res = np.empty([4, 10])
nT = np.empty([4, 10])
for i in range(10):
gbm1 = GradientBoostingRegressor(n_estimators=250*(1+i), learning_rate=0.01, subsample=1)
gbm1.fit(X, y)
gbm2 = GradientBoostingRegressor(n_estimators=250*(1+i), learning_rate=0.01, subsample=0.1)
gbm2.fit(X, y)
res[0, i] = mse(gbm1)
res[1, i] = mse(gbm2)
nT[0, i] = 250 * (1 + i)
nT[1, i] = 250 * (1 + i)
lams = [1, 0.1, 0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001, 0.000001]
for i in range(10):
bs1, lasso1, nT1 = ISLE(X, y, model = "GB", M = 2500, lam = lams[i])
bs2, lasso2, nT2 = ISLE(X, y, model = "RF", M = 2500, lam = lams[i])
nT[2, i] = nT1
nT[3, i] = nT2
res[2, i] = mse_isle(bs1, lasso1)
res[3, i] = mse_isle(bs2, lasso2)

np.savetxt("res.txt", res)
plt.figure(figsize=(16, 14))
labels = ['GBM (1, 0.01)', 'GBM (0.1, 0.01)', 'ISLE GB', 'ISLE RF']
for x_arr, y_arr, label in zip(nT, res, labels):
plt.plot(x_arr, y_arr, label = label)
plt.axhline(y=rf_mse, linestyle = '--', label = "Random Forest")
plt.xlabel("Number of Trees")
plt.ylabel("Mean Squared Error")
plt.legend()
plt.savefig('res.png')
plt.show()
76 changes: 76 additions & 0 deletions imgs/fig.16.8/src_20190419.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import Lasso
import numpy as np
from matplotlib import pyplot as plt


def genXY(N, p):
X = np.random.rand(N*p).reshape(N, p)
y = 10 * np.exp(-2*np.sum(X[:,0:5]**2, axis = 1)) + np.sum(X[:,5:35], axis = 1) + np.random.normal(0, 1.3, N)
return X, y

def mse(regr):
X_test, y_test = genXY(500, 100)
y_pred = regr.predict(X_test)
return np.mean((y_pred - y_test)**2)

def ISLE(X, y, eta = 0.5, nu = 0.1, M = 10, model = "GB", n_est = 500):
N = len(X)
# step 1
f_hat = np.repeat(np.mean(y), N)
bs = [None] * M
for m in range(M):
Sm_idx = np.random.choice(N, int(eta*N), replace = False)
if model == "GB":
bs[m] = GradientBoostingRegressor(n_estimators=n_est, learning_rate=0.01)
else:
bs[m] = RandomForestRegressor(n_estimators=n_est)
bs[m].fit(X[Sm_idx], y[Sm_idx] - f_hat[Sm_idx])
f_hat = f_hat + nu * bs[m].predict(X)
# step 2
lasso = Lasso(alpha = 0.1)
Tx = np.empty([N, M])
for i in range(M):
Tx[:,i] = bs[i].predict(X)
lasso.fit(Tx, y)
return bs, lasso

def mse_isle(bs, lasso):
M = len(bs)
X_test, y_test = genXY(500, 100)
Tx = np.empty([len(X_test), M])
for i in range(M):
Tx[:, i] = bs[i].predict(X_test)
y_pred = lasso.predict(Tx)
return np.mean((y_pred - y_test)**2)

if __name__ == '__main__':
N = 1000
p = 100
X, y = genXY(1000,100)
# random forest
rf = RandomForestRegressor(n_estimators=2500)
rf.fit(X,y)
rf_mse = mse(rf)
res = np.empty([4, 5])
for i in range(5):
gbm1 = GradientBoostingRegressor(n_estimators=500*(1+i), learning_rate=0.01, subsample=1)
gbm1.fit(X, y)
gbm2 = GradientBoostingRegressor(n_estimators=500*(1+i), learning_rate=0.01, subsample=0.1)
gbm2.fit(X, y)
bs1, lasso1 = ISLE(X, y, model = "GB", n_est = 500*(1+i))
bs2, lasso2 = ISLE(X, y, model = "RF", n_est = 500*(1+i))
res[0, i] = mse(gbm1)
res[1, i] = mse(gbm2)
res[2, i] = mse_isle(bs1, lasso1)
res[3, i] = mse_isle(bs2, lasso2)
np.savetxt("res.txt", res)
labels = ['GBM (1, 0.01)', 'GBM (0.1, 0.01)', 'ISLE GB', 'ISLE RF']
for y, label in zip(res, labels):
plt.plot(500*(np.arange(5) + 1), y, label = label)
plt.axhline(y=rf_mse, linestyle = '--', label = "Random Forest")
plt.xlabel("Number of Trees")
plt.ylabel("Mean Squared Error")
plt.legend()
plt.show()
Loading

0 comments on commit 2e8d4f0

Please sign in to comment.