added hack to get best threshold cm from html

spennihana · spennihana · commit 06e5ad3de86b · 2013-10-22T14:59:15.000-07:00
diff --git a/bench/BMscripts/glm2Bench.py b/bench/BMscripts/glm2Bench.py
@@ -1,5 +1,5 @@
 #GLM2 bench
-import os, sys, time, csv
+import os, sys, time, csv, re, requests
 sys.path.append('../py/')
 sys.path.extend(['.','..'])
 import h2o_cmd, h2o, h2o_hosts, h2o_browse as h2b, h2o_import as h2i, h2o_rf, h2o_jobs
@@ -108,15 +108,27 @@ def doGLM2(f, folderPath, family, lambda_, alpha, nfolds, y, x, testFilehex, row
         row.update( {'AIC'          : glmView['glm_model']['validation']['aic'],
                      'nIterations'  : glmView['glm_model']['iteration'],
                      'nPredictors'  : len(glmView['glm_model']['beta']),
-                     'AverageError' : glmView['glm_model']['validation']['avg_err'],
+                     #'AverageError' : glmView['glm_model']['validation']['avg_err'],
                     })
         if family == "binomial":
+            #Scrape html of 2/glmmodelview to get best threshold,
+            #then, multiply by 100 and cast to int...
+            #then ask for the coresponding CM from _cms inside glmView
+            url     = 'http://%s:%d/2/GLMModelView.html?_modelKey=%s' % (h2o.nodes[0].http_addr, 55555, 'GLM('+f+')')
+            r       = requests.get(url).text
+            p1      = re.compile('threshold[:<>/a-z]*[0-9]\.[0-9]*')
+            p2      = re.compile('[0-9]\.[0-9]*')
+            best    = int(float(p2.search(p1.search(text).group()).group()) * 100)
+            best_cm = glmView['glm_model']['validation']['_cms'][best]['_arr']
+            avg_err = (best_cm[0][1] + best_cm[1][0]) / (sum([i for sublist in best_cm for i in sublist]))
             row.update( {#'scoreTime'          : scoreTime,
                          'AUC'                : glmView['glm_model']['validation']['auc'],
+                         'AverageError'       : avg_err,
                         })
         else:
             row.update( {#'scoreTime'          : scoreTime,
                          'AUC'                : 'NA',
+                         'AverageError'       : glmView['glm_model']['validation']['avg_err'],
                         })
         csvWrt.writerow(row)
     finally: