forked from szcf-weiya/ESL-CN
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6bd26ad
commit 2e8d4f0
Showing
6 changed files
with
780 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
3.471089278426772395e+00 3.694484974913252895e+00 3.221193494798355594e+00 2.629942061466373548e+00 2.981713992313058448e+00 3.227424906838612717e+00 3.161064671670890558e+00 3.265299525023363625e+00 3.041491391170615177e+00 2.840795689491775633e+00 | ||
3.913158516016176591e+00 3.271695872319472009e+00 2.835988244334160413e+00 3.088715779718863796e+00 2.704388051963395601e+00 2.918292647937700934e+00 2.806815158674249133e+00 2.500583084987830329e+00 2.690481447579023744e+00 2.466965132557395801e+00 | ||
5.262305622808069572e+00 5.487721583905344147e+00 5.512739370523116911e+00 4.408297653087700496e+00 3.437629788385348384e+00 2.927474280269209306e+00 3.134533486759246479e+00 2.880237251101936735e+00 2.926797973359059224e+00 2.857133290885807586e+00 | ||
5.002678448431630009e+00 5.292396072786397454e+00 4.536936805041232645e+00 4.860229915319444594e+00 4.415930135886694252e+00 3.926863709391194668e+00 3.698655840904252923e+00 3.864343900512865471e+00 3.700162351995403665e+00 3.387540785921615782e+00 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from sklearn.ensemble import RandomForestRegressor | ||
from sklearn.ensemble import GradientBoostingRegressor | ||
from sklearn.linear_model import Lasso | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
|
||
|
||
def genXY(N, p): | ||
X = np.random.rand(N*p).reshape(N, p) | ||
y = 10 * np.exp(-2*np.sum(X[:,0:5]**2, axis = 1)) + np.sum(X[:,5:35], axis = 1) + np.random.normal(0, 1.3, N) | ||
return X, y | ||
|
||
def mse(regr): | ||
X_test, y_test = genXY(500, 100) | ||
y_pred = regr.predict(X_test) | ||
return np.mean((y_pred - y_test)**2) | ||
|
||
def ISLE(X, y, eta = 0.5, nu = 0.1, M = 10, model = "GB", lam = 0.1): | ||
N = len(X) | ||
# induce basis functions via GB or RF | ||
if model == "GB": | ||
regr = GradientBoostingRegressor(n_estimators=M, learning_rate=0.01) #array shape (M, 1) | ||
regr.fit(X, y) | ||
bs = regr.estimators_[:,0] | ||
else: | ||
regr = RandomForestRegressor(n_estimators=M) # list | ||
regr.fit(X, y) | ||
bs = regr.estimators_ | ||
|
||
# initialize f0 | ||
f_hat = np.repeat(np.mean(y), N) | ||
for m in range(M): | ||
Sm_idx = np.random.choice(N, int(eta*N), replace = False) | ||
bs[m].fit(X[Sm_idx], y[Sm_idx] - f_hat[Sm_idx]) | ||
f_hat = f_hat + nu * bs[m].predict(X) | ||
|
||
# fit lasso path | ||
lasso = Lasso(alpha = lam) | ||
Tx = np.empty([N, M]) | ||
for i in range(M): | ||
Tx[:,i] = bs[i].predict(X) | ||
lasso.fit(Tx, y) | ||
nT = sum(lasso.coef_ != 0) | ||
return bs, lasso, nT | ||
|
||
def mse_isle(bs, lasso): | ||
M = len(bs) | ||
X_test, y_test = genXY(500, 100) | ||
Tx = np.empty([len(X_test), M]) | ||
for i in range(M): | ||
Tx[:, i] = bs[i].predict(X_test) | ||
y_pred = lasso.predict(Tx) | ||
return np.mean((y_pred - y_test)**2) | ||
|
||
if __name__ == '__main__': | ||
N = 1000 | ||
p = 100 | ||
X, y = genXY(1000,100) | ||
# random forest | ||
rf = RandomForestRegressor(n_estimators=2500) | ||
rf.fit(X,y) | ||
rf_mse = mse(rf) | ||
res = np.empty([4, 10]) | ||
nT = np.empty([4, 10]) | ||
for i in range(10): | ||
gbm1 = GradientBoostingRegressor(n_estimators=250*(1+i), learning_rate=0.01, subsample=1) | ||
gbm1.fit(X, y) | ||
gbm2 = GradientBoostingRegressor(n_estimators=250*(1+i), learning_rate=0.01, subsample=0.1) | ||
gbm2.fit(X, y) | ||
res[0, i] = mse(gbm1) | ||
res[1, i] = mse(gbm2) | ||
nT[0, i] = 250 * (1 + i) | ||
nT[1, i] = 250 * (1 + i) | ||
lams = [1, 0.1, 0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005, 0.00001, 0.000001] | ||
for i in range(10): | ||
bs1, lasso1, nT1 = ISLE(X, y, model = "GB", M = 2500, lam = lams[i]) | ||
bs2, lasso2, nT2 = ISLE(X, y, model = "RF", M = 2500, lam = lams[i]) | ||
nT[2, i] = nT1 | ||
nT[3, i] = nT2 | ||
res[2, i] = mse_isle(bs1, lasso1) | ||
res[3, i] = mse_isle(bs2, lasso2) | ||
|
||
np.savetxt("res.txt", res) | ||
plt.figure(figsize=(16, 14)) | ||
labels = ['GBM (1, 0.01)', 'GBM (0.1, 0.01)', 'ISLE GB', 'ISLE RF'] | ||
for x_arr, y_arr, label in zip(nT, res, labels): | ||
plt.plot(x_arr, y_arr, label = label) | ||
plt.axhline(y=rf_mse, linestyle = '--', label = "Random Forest") | ||
plt.xlabel("Number of Trees") | ||
plt.ylabel("Mean Squared Error") | ||
plt.legend() | ||
plt.savefig('res.png') | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
from sklearn.ensemble import RandomForestRegressor | ||
from sklearn.ensemble import GradientBoostingRegressor | ||
from sklearn.linear_model import Lasso | ||
import numpy as np | ||
from matplotlib import pyplot as plt | ||
|
||
|
||
def genXY(N, p): | ||
X = np.random.rand(N*p).reshape(N, p) | ||
y = 10 * np.exp(-2*np.sum(X[:,0:5]**2, axis = 1)) + np.sum(X[:,5:35], axis = 1) + np.random.normal(0, 1.3, N) | ||
return X, y | ||
|
||
def mse(regr): | ||
X_test, y_test = genXY(500, 100) | ||
y_pred = regr.predict(X_test) | ||
return np.mean((y_pred - y_test)**2) | ||
|
||
def ISLE(X, y, eta = 0.5, nu = 0.1, M = 10, model = "GB", n_est = 500): | ||
N = len(X) | ||
# step 1 | ||
f_hat = np.repeat(np.mean(y), N) | ||
bs = [None] * M | ||
for m in range(M): | ||
Sm_idx = np.random.choice(N, int(eta*N), replace = False) | ||
if model == "GB": | ||
bs[m] = GradientBoostingRegressor(n_estimators=n_est, learning_rate=0.01) | ||
else: | ||
bs[m] = RandomForestRegressor(n_estimators=n_est) | ||
bs[m].fit(X[Sm_idx], y[Sm_idx] - f_hat[Sm_idx]) | ||
f_hat = f_hat + nu * bs[m].predict(X) | ||
# step 2 | ||
lasso = Lasso(alpha = 0.1) | ||
Tx = np.empty([N, M]) | ||
for i in range(M): | ||
Tx[:,i] = bs[i].predict(X) | ||
lasso.fit(Tx, y) | ||
return bs, lasso | ||
|
||
def mse_isle(bs, lasso): | ||
M = len(bs) | ||
X_test, y_test = genXY(500, 100) | ||
Tx = np.empty([len(X_test), M]) | ||
for i in range(M): | ||
Tx[:, i] = bs[i].predict(X_test) | ||
y_pred = lasso.predict(Tx) | ||
return np.mean((y_pred - y_test)**2) | ||
|
||
if __name__ == '__main__': | ||
N = 1000 | ||
p = 100 | ||
X, y = genXY(1000,100) | ||
# random forest | ||
rf = RandomForestRegressor(n_estimators=2500) | ||
rf.fit(X,y) | ||
rf_mse = mse(rf) | ||
res = np.empty([4, 5]) | ||
for i in range(5): | ||
gbm1 = GradientBoostingRegressor(n_estimators=500*(1+i), learning_rate=0.01, subsample=1) | ||
gbm1.fit(X, y) | ||
gbm2 = GradientBoostingRegressor(n_estimators=500*(1+i), learning_rate=0.01, subsample=0.1) | ||
gbm2.fit(X, y) | ||
bs1, lasso1 = ISLE(X, y, model = "GB", n_est = 500*(1+i)) | ||
bs2, lasso2 = ISLE(X, y, model = "RF", n_est = 500*(1+i)) | ||
res[0, i] = mse(gbm1) | ||
res[1, i] = mse(gbm2) | ||
res[2, i] = mse_isle(bs1, lasso1) | ||
res[3, i] = mse_isle(bs2, lasso2) | ||
np.savetxt("res.txt", res) | ||
labels = ['GBM (1, 0.01)', 'GBM (0.1, 0.01)', 'ISLE GB', 'ISLE RF'] | ||
for y, label in zip(res, labels): | ||
plt.plot(500*(np.arange(5) + 1), y, label = label) | ||
plt.axhline(y=rf_mse, linestyle = '--', label = "Random Forest") | ||
plt.xlabel("Number of Trees") | ||
plt.ylabel("Mean Squared Error") | ||
plt.legend() | ||
plt.show() |
Oops, something went wrong.