|
1 | 1 | #GLM2 bench
|
2 |
| -import os, sys, time, csv |
| 2 | +import os, sys, time, csv, re, requests |
3 | 3 | sys.path.append('../py/')
|
4 | 4 | sys.path.extend(['.','..'])
|
5 | 5 | import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_jobs
|
@@ -108,15 +108,27 @@ def doGLM2(f, folderPath, family, lambda_, alpha, nfolds, y, x, testFilehex, row
|
108 | 108 | row.update( {'AIC' : glmView['glm_model']['validation']['aic'],
|
109 | 109 | 'nIterations' : glmView['glm_model']['iteration'],
|
110 | 110 | 'nPredictors' : len(glmView['glm_model']['beta']),
|
111 |
| - 'AverageError' : glmView['glm_model']['validation']['avg_err'], |
| 111 | + #'AverageError' : glmView['glm_model']['validation']['avg_err'], |
112 | 112 | })
|
113 | 113 | if family == "binomial":
|
| 114 | + #Scrape html of 2/glmmodelview to get best threshold, |
| 115 | + #then, multiply by 100 and cast to int... |
| 116 | + #then ask for the coresponding CM from _cms inside glmView |
| 117 | + url = 'http://%s:%d/2/GLMModelView.html?_modelKey=%s' % (h2o.nodes[0].http_addr, 55555, 'GLM('+f+')') |
| 118 | + r = requests.get(url).text |
| 119 | + p1 = re.compile('threshold[:<>/a-z]*[0-9]\.[0-9]*') |
| 120 | + p2 = re.compile('[0-9]\.[0-9]*') |
| 121 | + best = int(float(p2.search(p1.search(text).group()).group()) * 100) |
| 122 | + best_cm = glmView['glm_model']['validation']['_cms'][best]['_arr'] |
| 123 | + avg_err = (best_cm[0][1] + best_cm[1][0]) / (sum([i for sublist in best_cm for i in sublist])) |
114 | 124 | row.update( {#'scoreTime' : scoreTime,
|
115 | 125 | 'AUC' : glmView['glm_model']['validation']['auc'],
|
| 126 | + 'AverageError' : avg_err, |
116 | 127 | })
|
117 | 128 | else:
|
118 | 129 | row.update( {#'scoreTime' : scoreTime,
|
119 | 130 | 'AUC' : 'NA',
|
| 131 | + 'AverageError' : glmView['glm_model']['validation']['avg_err'], |
120 | 132 | })
|
121 | 133 | csvWrt.writerow(row)
|
122 | 134 | finally:
|
|
0 commit comments