@@ -1190,6 +1190,21 @@ def mode(self):
1190
1190
than 'maxr', 'backward' for backward selection.
1191
1191
1192
1192
Type: ``Literal["allsubsets", "maxr", "maxrsweep", "backward"]``, defaults to ``"maxr"``.
1193
+
1194
+ :examples:
1195
+
1196
+ >>> import h2o
1197
+ >>> from h2o.estimators import H2OModelSelectionEstimator
1198
+ >>> h2o.init()
1199
+ >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
1200
+ >>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
1201
+ >>> response = "GLEASON"
1202
+ >>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
1203
+ ... seed=12345,
1204
+ ... mode="maxr")
1205
+ >>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
1206
+ >>> results = maxrModel.result()
1207
+ >>> print(results)
1193
1208
"""
1194
1209
return self ._parms .get ("mode" )
1195
1210
@@ -1207,6 +1222,26 @@ def build_glm_model(self):
1207
1222
themselves. Defaults to false.
1208
1223
1209
1224
Type: ``bool``, defaults to ``False``.
1225
+
1226
+ :examples:
1227
+
1228
+ >>> import h2o
1229
+ >>> from h2o.estimators import H2OModelSelectionEstimator
1230
+ >>> h2o.init()
1231
+ >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
1232
+ >>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
1233
+ >>> response = "GLEASON"
1234
+ >>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
1235
+ ... seed=12345,
1236
+ ... mode="maxrsweep",
1237
+ ... build_glm_model=True)
1238
+ >>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
1239
+ >>> result = maxrModel.result()
1240
+ >>> # get the GLM model with the best performance for a fixed predictor size:
1241
+ >>> one_model = h2o.get_model(result["model_id"][1, 0])
1242
+ >>> predict = one_model.predict(prostate)
1243
+ >>> # print a version of the predict frame:
1244
+ >>> print(predict)
1210
1245
"""
1211
1246
return self ._parms .get ("build_glm_model" )
1212
1247
@@ -1222,6 +1257,22 @@ def p_values_threshold(self):
1222
1257
below this threshold
1223
1258
1224
1259
Type: ``float``, defaults to ``0.0``.
1260
+
1261
+ :examples:
1262
+
1263
+ >>> import h2o
1264
+ >>> from h2o.estimators import H2OModelSelectionEstimator
1265
+ >>> h2o.init()
1266
+ >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
1267
+ >>> predictors = ["AGE", "RACE", "CAPSULE", DCAPS", "PSA", "VOL", "DPROS"]
1268
+ >>> response = "GLEASON"
1269
+ >>> backwardModel = H2OModelSelectionEstimator(min_predictor_number=2,
1270
+ ... seed=12345,
1271
+ ... mode="backward",
1272
+ ... p_values_threshold=0.001)
1273
+ >>> backwardModel.train(x=predictors, y=response, training_frame=prostate)
1274
+ >>> result = backwardModel.result()
1275
+ >>> print(result)
1225
1276
"""
1226
1277
return self ._parms .get ("p_values_threshold" )
1227
1278
@@ -1236,6 +1287,22 @@ def influence(self):
1236
1287
If set to dfbetas will calculate the difference in beta when a datarow is included and excluded in the dataset.
1237
1288
1238
1289
Type: ``Literal["dfbetas"]``.
1290
+
1291
+ :examples:
1292
+
1293
+ >>> import h2o
1294
+ >>> from h2o.estimators import H2OModelSelectionEstimator
1295
+ >>> h2o.init()
1296
+ >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
1297
+ >>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
1298
+ >>> response = "GLEASON"
1299
+ >>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
1300
+ ... seed=12345,
1301
+ ... mode="maxr",
1302
+ ... influence="dfbetas")
1303
+ >>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
1304
+ >>> glm_rid = maxrModel.get_regression_influence_diagnostics()
1305
+ >>> print(glm_rid)
1239
1306
"""
1240
1307
return self ._parms .get ("influence" )
1241
1308
@@ -1303,6 +1370,23 @@ def coef_norm(self, predictor_size=None):
1303
1370
1304
1371
:param predictor_size: predictor subset size, will only return model coefficients of that subset size.
1305
1372
:return: list of Python Dicts of coefficients for all models built with different predictor numbers
1373
+
1374
+ :examples:
1375
+
1376
+ >>> import h2o
1377
+ >>> from h2o.estimators import H2OModelSelectionEstimator
1378
+ >>> h2o.init()
1379
+ >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
1380
+ >>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
1381
+ >>> response = "GLEASON"
1382
+ >>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
1383
+ ... seed=12345,
1384
+ ... mode="maxr")
1385
+ >>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
1386
+ >>> coeff_norm = maxrModel.coef_norm()
1387
+ >>> print(coeff_norm)
1388
+ >>> coeff_norm_3 = maxrModel.coef_norm(predictor_size=3) # print coefficient norm with 3 predictors
1389
+ >>> print(coeff_norm_3)
1306
1390
"""
1307
1391
model_ids = self ._model_json ["output" ]["best_model_ids" ]
1308
1392
if not (self .actual_params ["build_glm_model" ]) and self .actual_params ["mode" ]== "maxrsweep" :
@@ -1356,6 +1440,23 @@ def coef(self, predictor_size=None):
1356
1440
1357
1441
:param predictor_size: predictor subset size, will only return model coefficients of that subset size.
1358
1442
:return: list of Python Dicts of coefficients for all models built with different predictor numbers
1443
+
1444
+ :examples:
1445
+
1446
+ >>> import h2o
1447
+ >>> from h2o.estimators import H2OModelSelectionEstimator
1448
+ >>> h2o.init()
1449
+ >>> prostate = h2o.import_file("http://s3.amazonaws.com/h2o-public-test-data/smalldata/logreg/prostate.csv")
1450
+ >>> predictors = ["AGE", "RACE", "CAPSULE", "DCAPS", "PSA", "VOL", "DPROS"]
1451
+ >>> response = "GLEASON"
1452
+ >>> maxrModel = H2OModelSelectionEstimator(max_predictor_number=5,
1453
+ ... seed=12345,
1454
+ ... mode="maxr")
1455
+ >>> maxrModel.train(x=predictors, y=response, training_frame=prostate)
1456
+ >>> coeff = maxrModel.coef()
1457
+ >>> print(coeff)
1458
+ >>> coeff_3 = maxrModel.coef(predictor_size=3)
1459
+ >>> print(coeff_3)
1359
1460
"""
1360
1461
if not self .actual_params ["build_glm_model" ] and self .actual_params ["mode" ]== "maxrsweep" :
1361
1462
coef_names = self ._model_json ["output" ]["coefficient_names" ]
@@ -1409,6 +1510,7 @@ def coef(self, predictor_size=None):
1409
1510
def result (self ):
1410
1511
"""
1411
1512
Get result frame that contains information about the model building process like for modelselection and anovaglm.
1513
+
1412
1514
:return: the H2OFrame that contains information about the model building process like for modelselection and anovaglm.
1413
1515
"""
1414
1516
return H2OFrame ._expr (expr = ExprNode ("result" , ASTId (self .key )))._frame (fill_cache = True )
0 commit comments