bug fixes

baidu · Dec 4, 2016 · 516f7e7 · 516f7e7
1 parent 9735020
commit 516f7e7
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 11 deletions.
diff --git a/CHANGES b/CHANGES
@@ -2,6 +2,6 @@
 0.2 (Aug 2016)
   This is the first release. It only supports binary classification and regression, with significant simplifications from the original RGF algorithm for speed consideration. Additional functionalities  will be supported in future releases.
 
+0.3 (Dec 2016)
+  Fixed several bugs that affect prediction performance (especially for small datasets).
 
-
-
diff --git a/README.md b/README.md
@@ -2,14 +2,14 @@
 #                                       FastRGF
 ###     Multi-core implementation of Regularized Greedy Forest [RGF] 
 
-### Version 0.2 (August 2016) by Tong Zhang
+### Version 0.3 (Dec 2016) by Tong Zhang
                           
 ---------
 #### 1. Introduction
 
 This software package provides a multi-core implementation of a simplified Regularized Greedy Forest (RGF) described in **[RGF]**. Please cite the paper if you find the software useful. 
 
-RGF is a machine learning method for building decision forests that have been used to win some kaggle competitions. In our experience it works better than *gradient boosting* on many relatively large data.
+RGF is a machine learning method for building decision forests that have been used to win some kaggle competitions. In our experience it works better than *gradient boosting* on many relatively large datasets.
 
 The implementation employs the following conepts described in the **[RGF]** paper:
 
@@ -53,7 +53,7 @@ The software is written in c++11, and it has been tested under linux and macos,
 You may use the option -h to show command-line options (options can also be provided in a configuration file).
 
 #### 3. Examples
- Go to the subdirectory examples/, and following the instructions in [README.md](examples/README.md) (it also contains some tips for parameter tuning).
+ Go to the subdirectory examples/, and following the instructions in [README.md](examples/README.md). The file also contains some tips for parameter tuning.
 
 #### 4. Contact
 Tong Zhang

diff --git a/src/base/discretization.cpp b/src/base/discretization.cpp
@@ -46,7 +46,7 @@ namespace _discretizationTrainerDense
       if (min_bucket_weights<1) min_bucket_weights=1.0;
 
       for (size_t my_cut = begin; my_cut < end; my_cut++) {
-	if (my_cut<end-1 && s_arr[my_cut].x >=s_arr[my_cut+1].x) {
+	if (s_arr[my_cut].x >=s_arr[my_cut+1].x) {
 	  assert(s_arr[my_cut].x==s_arr[my_cut+1].x);
 	  continue;
 	}
@@ -126,7 +126,7 @@ float _discretizationTrainerDense::train
   while (nbuckets <max_buckets && qu.size()>0) {
     Bucket b= qu.top();
     qu.pop();
-    if (b.cut>=n-2 || b.gain <=0) continue;
+    if (b.cut>=n-2 || b.gain <=0 || (s[b.cut].x>=s[b.cut+1].x)) continue;
     tot_gain+=b.gain;
     b_vec.push_back(0.5*(s[b.cut].x+s[b.cut+1].x));
     nbuckets++;
@@ -336,7 +336,7 @@ void FeatureDiscretizationSparse<feat_t,id_t,disc_t>::train
 
 
   size_t nf;
-  for (nf=min<size_t>(id_counts.size()-1,tr.max_features.value); nf >0; nf--) {
+  for (nf=min<size_t>(id_counts.size(),tr.max_features.value); nf >0; nf--) {
     if (gain[nf-1].value>0) break;
   }
   boundary_arr.reset(nf);

diff --git a/src/forest/feature_mapper.h b/src/forest/feature_mapper.h
@@ -149,7 +149,7 @@ namespace _decisionTreeTrainer
     {
 
       int_t psize=parent.size;
-      if (copy_from_parent || (data_size >=psize/2) ) { 
+      if (copy_from_parent || (data_size >=psize/2)) { 
 	my_storage=nullptr;
 	size=parent.size;
 	fv_map_ptr= parent.fv_map_ptr;

diff --git a/src/forest/node_trainer.h b/src/forest/node_trainer.h
@@ -286,9 +286,9 @@ namespace _decisionTreeTrainer
       cut_orig=-1;
       if (gain>=0 && feature >=0 && cut >=0) {
 	if (feature<featmap_dense.size()) {
-	  cut_orig=featmap_dense[feature].get(cut);
+	  cut_orig=featmap_dense[feature].get(cut); 
 	}
-	cut_orig=cut;
+	else cut_orig=cut;  
       }
     }