Skip to content

Commit

Permalink
Add support for case weights to Cox PH modeling.
Browse files Browse the repository at this point in the history
  • Loading branch information
aboyoun committed Oct 21, 2014
1 parent c52aa85 commit ca0877a
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 85 deletions.
38 changes: 24 additions & 14 deletions R/h2o-package/R/Algorithms.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ h2o.coxph.control <- function(lre = 9, iter.max = 20, ...)

list(lre = lre, iter.max = as.integer(iter.max))
}
h2o.coxph <- function(x, y, data, key = "", ties = c("efron", "breslow"),
h2o.coxph <- function(x, y, data, key = "", weights, ties = c("efron", "breslow"),
init = 0, control = h2o.coxph.control(...), ...)
{
if (!is(data, "H2OParsedData"))
Expand All @@ -38,6 +38,13 @@ h2o.coxph <- function(x, y, data, key = "", ties = c("efron", "breslow"),
stop("'y' must be a character vector of column names from 'data' ",
"specifying a (start, stop, event) triplet or (stop, event) couplet")

useWeights <- !missing(weights)
if (useWeights) {
if (!is.character(weights) || length(weights) != 1L || !(weights %in% cnames))
stop("'weights' must be missing or a character string specifying a column name from 'data'")
} else
weights <- y[1L]

if (!is.character(key) && length(key) == 1L)
stop("'key' must be a character string")
if (nchar(key) > 0 && !grepl("^[a-zA-Z_][a-zA-Z0-9_.]*$", key))
Expand All @@ -51,24 +58,25 @@ h2o.coxph <- function(x, y, data, key = "", ties = c("efron", "breslow"),
stop("'init' must be a numeric vector containing finite coefficient starting values")

job <- .h2o.__remoteSend(data@h2o, .h2o.__PAGE_CoxPH,
destination_key = key,
source = data@key,
use_start_column = as.integer(ny == 3L),
start_column = y[1L],
stop_column = y[ny - 1L],
event_column = y[ny],
x_columns = match(x, cnames) - 1L,
ties = ties,
init = init,
lre_min = control$lre,
iter_max = control$iter.max)
destination_key = key,
source = data@key,
use_start_column = as.integer(ny == 3L),
start_column = y[1L],
stop_column = y[ny - 1L],
event_column = y[ny],
x_columns = match(x, cnames) - 1L,
use_weights_column = as.integer(useWeights),
weights_column = weights,
ties = ties,
init = init,
lre_min = control$lre,
iter_max = control$iter.max)
job_key <- job$job_key
dest_key <- job$destination_key
.h2o.__waitOnJob(data@h2o, job_key)
res <- .h2o.__remoteSend(data@h2o, .h2o.__PAGE_CoxPHModelView,
'_modelKey' = dest_key)
df <- length(res[[3]]$coef)
nnum <- length(res[[3L]]$x_mean)
coef_names <- res[[3L]]$coef_names
mcall <- match.call()
model <-
Expand All @@ -77,7 +85,9 @@ h2o.coxph <- function(x, y, data, key = "", ties = c("efron", "breslow"),
loglik = c(res[[3L]]$null_loglik, res[[3L]]$loglik),
score = res[[3L]]$score_test,
iter = res[[3L]]$iter,
means = structure(res[[3L]]$x_mean, names = tail(coef_names, nnum)),
means = structure(c(unlist(res[[3L]]$x_mean_cat),
unlist(res[[3L]]$x_mean_num)),
names = coef_names),
method = ties,
n = res[[3L]]$n,
nevent = res[[3L]]$total_event,
Expand Down
4 changes: 3 additions & 1 deletion R/h2o-package/man/h2o.coxph.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ H2O: Cox Proportional Hazards Models
Fit a Cox Proportional Hazards Model.
}
\usage{
h2o.coxph(x, y, data, key = "", ties = c("efron", "breslow"),
h2o.coxph(x, y, data, key = "", weights, ties = c("efron", "breslow"),
init = 0, control = h2o.coxph.control(...), ...)

h2o.coxph.control(lre = 9, iter.max = 20, ...)
Expand All @@ -44,6 +44,8 @@ h2o.coxph.control(lre = 9, iter.max = 20, ...)
variables in the model.}
\item{key}{An optional unique hex key assigned to the resulting model.
If none is given, a key will automatically be generated.}
\item{weights}{An optional character string representing the case weights in
the model.}
\item{ties}{A character string denoting which approximation method for
handling ties should be used in the partial likelihood;
one of either \code{"efron"} or \code{"breslow"}.}
Expand Down
8 changes: 5 additions & 3 deletions R/tests/Utils/coxphR.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ checkCoxPHModel <- function(myCoxPH.h2o, myCoxPH.r, tolerance = 1e-8, ...) {
checkEquals(myCoxPH.r$score, myCoxPH.h2o@model$score,
tolerance = tolerance)
checkTrue ( myCoxPH.h2o@model$iter >= 1L)
checkEquals(tail(myCoxPH.r$means, length(myCoxPH.h2o@model$means)),
myCoxPH.h2o@model$means,
tolerance = tolerance)
if (myCoxPH.h2o@survfit$type == "counting")
myCoxPH.r$means[] <- myCoxPH.h2o@model$means # survival::coxph generates unweighted means when a start time is supplied
else
checkEquals(myCoxPH.r$means, myCoxPH.h2o@model$means,
tolerance = tolerance, check.attributes = FALSE)
checkEquals(myCoxPH.r$method, myCoxPH.h2o@model$method)
checkEquals(myCoxPH.r$n, myCoxPH.h2o@model$n)
checkEquals(myCoxPH.r$nevent, myCoxPH.h2o@model$nevent)
Expand Down
25 changes: 24 additions & 1 deletion R/tests/testdir_algos/coxph/runit_CoxPH_bladder.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,27 @@ test.CoxPH.bladder <- function(conn) {
bladder$enum <- as.factor(bladder$enum)
bladder.h2o <- as.h2o(conn, bladder, key = "bladder.h2o")

Log.info("H2O Cox PH Model of bladder Data Set using Efron's Approximation; 1 predictor\n")
bladder.coxph.h2o <-
h2o.coxph(x = "size", y = c("stop", "event"), data = bladder.h2o,
key = "bladmod.h2o")
bladder.coxph <- coxph(Surv(stop, event) ~ size, data = bladder)
checkCoxPHModel(bladder.coxph.h2o, bladder.coxph)

Log.info("H2O Cox PH Model of bladder Data Set using Efron's Approximation; 4 predictors\n")
bladder.coxph.h2o <-
h2o.coxph(x = c("enum", "rx", "number", "size"), y = c("stop", "event"), data = bladder.h2o,
key = "bladmod.h2o")
bladder.coxph <- coxph(Surv(stop, event) ~ enum + rx + number + size, data = bladder)
checkCoxPHModel(bladder.coxph.h2o, bladder.coxph)
checkCoxPHModel(bladder.coxph.h2o, bladder.coxph)

Log.info("H2O Cox PH Model of bladder Data Set using Efron's Approximation; 4 predictors and case weights\n")
bladder.coxph.h2o <-
h2o.coxph(x = c("enum", "rx", "number", "size"), y = c("stop", "event"), data = bladder.h2o,
key = "bladmod.h2o", weights = "id")
bladder.coxph <- coxph(Surv(stop, event) ~ enum + rx + number + size, data = bladder, weights = bladder$id)
checkCoxPHModel(bladder.coxph.h2o, bladder.coxph)

Log.info("H2O Cox PH Model of bladder Data Set using Efron's Approximation; init = 0.2\n")
bladder.coxph.h2o <-
Expand All @@ -36,14 +51,22 @@ test.CoxPH.bladder <- function(conn) {
coxph(Surv(stop, event) ~ size, data = bladder, ties = "breslow")
checkCoxPHModel(bladder.coxph.h2o, bladder.coxph, tolerance = 1e-7)

Log.info("H2O Cox PH Model of bladder Data Set using Breslow's Approximation; 3 predictors\n")
Log.info("H2O Cox PH Model of bladder Data Set using Breslow's Approximation; 4 predictors\n")
bladder.coxph.h2o <-
h2o.coxph(x = c("enum", "rx", "number", "size"), y = c("stop", "event"), data = bladder.h2o,
key = "bladmod.h2o", ties = "breslow")
bladder.coxph <-
coxph(Surv(stop, event) ~ enum + rx + number + size, data = bladder, ties = "breslow")
checkCoxPHModel(bladder.coxph.h2o, bladder.coxph)

Log.info("H2O Cox PH Model of bladder Data Set using Breslow's Approximation; 4 predictors and case weights\n")
bladder.coxph.h2o <-
h2o.coxph(x = c("enum", "rx", "number", "size"), y = c("stop", "event"), data = bladder.h2o,
key = "bladmod.h2o", weights = "id", ties = "breslow")
bladder.coxph <-
coxph(Surv(stop, event) ~ enum + rx + number + size, data = bladder, weights = bladder$id, ties = "breslow")
checkCoxPHModel(bladder.coxph.h2o, bladder.coxph)

Log.info("H2O Cox PH Model of bladder Data Set using Breslow's Approximation; init = 0.2\n")
bladder.coxph.h2o <-
h2o.coxph(x = "size", y = c("stop", "event"), data = bladder.h2o,
Expand Down
18 changes: 17 additions & 1 deletion R/tests/testdir_algos/coxph/runit_CoxPH_heart.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,13 @@ test.CoxPH.heart <- function(conn) {
heart.coxph <- coxph(Surv(start, stop, event) ~ transplant + age + year + surgery, data = heart)
checkCoxPHModel(heart.coxph.h2o, heart.coxph)

Log.info("H2O Cox PH Model of heart Data Set using Efron's Approximation; 4 predictors and case weights\n")
heart.coxph.h2o <-
h2o.coxph(x = c("transplant", "age", "year", "surgery"), y = c("start", "stop", "event"), data = heart.h2o,
key = "heartmod.h2o", weights = "id")
heart.coxph <- coxph(Surv(start, stop, event) ~ transplant + age + year + surgery, data = heart, weights = heart$id)
checkCoxPHModel(heart.coxph.h2o, heart.coxph)

Log.info("H2O Cox PH Model of heart Data Set using Efron's Approximation; init = 0.05\n")
heart.coxph.h2o <-
h2o.coxph(x = "age", y = c("start", "stop", "event"), data = heart.h2o,
Expand All @@ -43,14 +50,23 @@ test.CoxPH.heart <- function(conn) {
coxph(Surv(start, stop, event) ~ age, data = heart, ties = "breslow")
checkCoxPHModel(heart.coxph.h2o, heart.coxph)

Log.info("H2O Cox PH Model of heart Data Set using Breslow's Approximation; 3 predictors\n")
Log.info("H2O Cox PH Model of heart Data Set using Breslow's Approximation; 4 predictors\n")
heart.coxph.h2o <-
h2o.coxph(x = c("transplant", "age", "year", "surgery"), y = c("start", "stop", "event"), data = heart.h2o,
key = "heartmod.h2o", ties = "breslow")
heart.coxph <-
coxph(Surv(start, stop, event) ~ transplant + age + year + surgery, data = heart, ties = "breslow")
checkCoxPHModel(heart.coxph.h2o, heart.coxph)

Log.info("H2O Cox PH Model of heart Data Set using Breslow's Approximation; 4 predictors and case weights\n")
heart.coxph.h2o <-
h2o.coxph(x = c("transplant", "age", "year", "surgery"), y = c("start", "stop", "event"), data = heart.h2o,
key = "heartmod.h2o", weights = "id", ties = "breslow")
heart.coxph <-
coxph(Surv(start, stop, event) ~ transplant + age + year + surgery, data = heart,
weights = heart$id, ties = "breslow")
checkCoxPHModel(heart.coxph.h2o, heart.coxph)

Log.info("H2O Cox PH Model of heart Data Set using Breslow's Approximation; init = 0.05\n")
heart.coxph.h2o <-
h2o.coxph(x = "age", y = c("start", "stop", "event"), data = heart.h2o,
Expand Down
Loading

0 comments on commit ca0877a

Please sign in to comment.