diff --git a/py/testdir_single_jvm/binquant.py b/py/testdir_single_jvm/binquant.py index 037077783b..b486290bf8 100644 --- a/py/testdir_single_jvm/binquant.py +++ b/py/testdir_single_jvm/binquant.py @@ -6,6 +6,7 @@ import math OTHER_T = 0.5 BIN_COUNT = 20 +BIN_COUNT = 1 # Defintion (this defn. seems odd. for the case of real quantiles, it should be a floor, not a round up?) # This definition may be correct for 1-based indexing. (we do zero-based indexing in the code below, so it looks different) @@ -190,10 +191,10 @@ def htot2(): guess = (hcnt2_max[k] - hcnt2_min[k]) / 2 if currentCnt==targetCntInt: - if hcnt2[k]>2: + if hcnt2[k]>2 and (hcnt2_min[k]==hcnt2_max[k]): guess = hcnt2_min[k] done = True - print "Guess A", guess + print "Guess A", guess, k, hcnt2[k] if hcnt2[k]==2: # no mattter what size the fraction it would be on this number diff --git a/py/testdir_single_jvm/test_summary2_unifiles2.py b/py/testdir_single_jvm/test_summary2_unifiles2.py index 3947f271bc..49a8b63224 100644 --- a/py/testdir_single_jvm/test_summary2_unifiles2.py +++ b/py/testdir_single_jvm/test_summary2_unifiles2.py @@ -10,6 +10,7 @@ DO_MEDIAN = True MAX_QBINS = 1000 +MAX_QBINS = 2 def twoDecimals(l): if isinstance(l, list): @@ -82,9 +83,14 @@ def generate_scipy_comparison(csvPathname, col=0, h2oMedian=None, h2oMedian2=Non b = h2o_summ.percentileOnSortedList(targetFP, 0.50 if DO_MEDIAN else 0.999) label = '50%' if DO_MEDIAN else '99.9%' h2p.blue_print(label, "from sort:", b) - h2p.blue_print(label, "from scipy:", a[5 if DO_MEDIAN else 10]) + s = a[5 if DO_MEDIAN else 10] + h2p.blue_print(label, "from scipy:", s) h2p.blue_print(label, "from h2o singlepass:", h2oMedian) h2p.blue_print(label, "from h2o multipass:", h2oMedian2) + # they should be identical. keep a tight absolute tolerance + h2o_util.assertApproxEqual(h2oMedian2, b, tol=0.0000002, msg='h2o quantile multipass is not approx. same as sort algo') + h2o_util.assertApproxEqual(h2oMedian2, s, tol=0.0000002, msg='h2o quantile multipass is not approx. same as scipy algo') + # see if scipy changes. nope. it doesn't if 1==0: a = stats.mstats.mquantiles(targetFP, prob=per) diff --git a/src/main/java/hex/Quantiles.java b/src/main/java/hex/Quantiles.java index 9662c4725d..0248f8a2e4 100644 --- a/src/main/java/hex/Quantiles.java +++ b/src/main/java/hex/Quantiles.java @@ -404,7 +404,7 @@ private boolean exactQuantilesMultiPass(double[] qtiles, double[] thres, long de boolean interpolated = false; if ( currentCnt==targetCntInt ) { - if ( hcnt2[k]>2 ) { + if ( hcnt2[k]>2 && (hcnt2_min[k]==hcnt2_max[k]) ) { guess = hcnt2_min[k]; done = true; Log.info("Q_ Guess A "+guess);