From a673f7a9197808cace791f9a9ce7ec0d8d96cf7f Mon Sep 17 00:00:00 2001 From: vishrutg Date: Wed, 14 Oct 2015 12:55:54 -0700 Subject: [PATCH] fixing corner cases of reduced memory prediction for hpdRF_paralleltree --- algorithms/HPdclassifier/R/hpdRF_parallelTree.R | 7 +------ algorithms/HPdclassifier/R/hpdRFdistributed.R | 13 ++++++++++--- algorithms/HPdclassifier/src/hpdRFpredict.cpp | 6 +----- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/algorithms/HPdclassifier/R/hpdRF_parallelTree.R b/algorithms/HPdclassifier/R/hpdRF_parallelTree.R index b99b5574..6fb9ca4b 100644 --- a/algorithms/HPdclassifier/R/hpdRF_parallelTree.R +++ b/algorithms/HPdclassifier/R/hpdRF_parallelTree.R @@ -365,18 +365,13 @@ hpdrandomForest <- hpdRF_parallelTree <- function(formula, data, oob_predictions = NULL if(completeModel) { - tryCatch({ if(do.trace) .master_output("\tComputing oob statistics") timing_info <- Sys.time() oob_predictions = .predictOOB(forest, observations, responses, oob_indices, cutoff, classes, - reduceModel = reduceModel,do.trace) + ntree = ntree, reduceModel = reduceModel, do.trace) forest = oob_predictions$dforest - },error = function(e) - { - stop(paste("aborting oob computations. received error:", e)) - }) if(do.trace) .master_output("\tcurrent distributed forest size: ", format(round(d.object.size(forest)/1024/1024,2),nsmall = 2), diff --git a/algorithms/HPdclassifier/R/hpdRFdistributed.R b/algorithms/HPdclassifier/R/hpdRFdistributed.R index 937d30f7..325a4ee6 100644 --- a/algorithms/HPdclassifier/R/hpdRFdistributed.R +++ b/algorithms/HPdclassifier/R/hpdRFdistributed.R @@ -555,7 +555,7 @@ .predictOOB <- function(forest, observations, responses, oob_indices, - cutoff, classes, reduceModel = FALSE, trace = FALSE) + cutoff, classes, ntree, reduceModel = FALSE, trace = FALSE) { timing_info <- Sys.time() @@ -571,8 +571,12 @@ dforest = forest + if(ntree < npartitions(forest)) + dforest = .redistributeForest(forest,as.list(1:ntree)) + rm(forest) suppressWarnings({ - votes = darray(npartitions =c(npartitions(dforest),npartitions(observations))) + votes = darray(npartitions =c(npartitions(dforest), + npartitions(observations))) }) foreach(i,0:(npartitions(votes)-1), function(predictions=splits(votes,i+1), @@ -585,6 +589,7 @@ { tree_ids = which(!sapply(forest,is.null)) tree_ids = tree_ids[-1] + forest = .Call("unserializeForest",forest, PACKAGE = "HPdclassifier") forestparam = .Call("getForestParameters", forest, @@ -612,9 +617,10 @@ predictions = matrix(as.double(NA), ncol = nrow(observations), length(tree_ids)) - + tree_ids = tree_ids - 1 oob_indices = lapply(oob_indices, as.integer) + .Call("forestPredictOOB", forest, predictions, observations, oob_indices, as.integer(tree_ids)) @@ -622,6 +628,7 @@ update(predictions) },progress = FALSE) + timing_info <- Sys.time() - timing_info if(trace) .master_output(format(round(timing_info, 2),nsmall = 2)) diff --git a/algorithms/HPdclassifier/src/hpdRFpredict.cpp b/algorithms/HPdclassifier/src/hpdRFpredict.cpp index 0b33d485..35e593b2 100644 --- a/algorithms/HPdclassifier/src/hpdRFpredict.cpp +++ b/algorithms/HPdclassifier/src/hpdRFpredict.cpp @@ -499,11 +499,7 @@ extern "C" for(int tree_index = 0; tree_index < length(R_tree_ids); tree_index++) { int tree_id = INTEGER(R_tree_ids)[tree_index]-1; - SEXP indices; - if(length(R_indices) > 1) - indices = VECTOR_ELT(R_indices,tree_id); - else - indices = R_indices; + SEXP indices = VECTOR_ELT(R_indices,tree_id); for(int i = 0; i < length(indices); i++) { int obs_index = INTEGER(indices)[i]-1;