Skip to content

Commit

Permalink
idempotent tidy_draws() for already-tidy data frames, closes mjskay#82
Browse files Browse the repository at this point in the history
  • Loading branch information
mjskay committed Sep 27, 2019
1 parent c9ba278 commit 14b9f93
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 1 deletion.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ S3method(residual_draws,brmsfit)
S3method(residual_draws,default)
S3method(tidy_draws,MCMCglmm)
S3method(tidy_draws,brmsfit)
S3method(tidy_draws,data.frame)
S3method(tidy_draws,default)
S3method(tidy_draws,jagsUI)
S3method(tidy_draws,matrix)
Expand Down
53 changes: 53 additions & 0 deletions R/tidy_draws.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ as_sample_data_frame = function(...) {
#' of \code{\link[coda]{as.mcmc.list}} (\code{tidy_draws} should work on any model
#' with an implementation of \code{\link[coda]{as.mcmc.list}})
#'
#' \code{tidy_draws} can be applied to a data frame that is already a tidy-format data frame
#' of draws, provided it has one row per draw. In other words, it can be applied to data frames
#' that have the same format it returns, and it will return the same data frame back, while
#' checking to ensure the \code{.chain}, \code{.iteration}, and \code{.draw} columns are all
#' integers (converting if possible) and that the \code{.draw} column is unique. This allows
#' you to pass already-tidy-format data frames into other tidybayes functions, like
#' \code{\link{spread_draws}} or \code{\link{gather_draws}}.
#'
#' @param model A supported Bayesian model fit object. See \code{\link{tidybayes-models}} for a list of supported
#' models.
#' @return A data frame (actually, a \code{\link[tibble]{tibble}}) with a \code{.chain} column,
Expand Down Expand Up @@ -75,6 +83,51 @@ tidy_draws.default = function(model) {
tidy_draws(as.mcmc.list(model))
}

#' @rdname tidy_draws
#' @export
tidy_draws.data.frame = function(model) {
stop_message = paste0(
"To use a data frame directly with `tidy_draws()`, it must already be a\n",
" tidy-format data frame of draws: it must have integer-like `.chain`\n",
" `.iteration`, and `.draw` columns with one row per draw.\n",
"\n"
)

# iterate over draw index columns to check they are integers, recording if they passed the check
# (and cleaning as necessary)
check_cols = c(".chain", ".iteration", ".draw")
passed = sapply(check_cols, function(col) {
col_value = model[[col]]

if (is.null(col_value)) return(FALSE)
if (is.integer(col_value)) return(TRUE)
if (is.logical(col_value) || is_integerish(col_value)) {
model[[col]] <<- as.integer(col_value)
return(TRUE)
}

# if we make it this far, the column did not pass all checks
FALSE
})

failed_cols = check_cols[!passed]
if (length(failed_cols) > 0) {
stop(stop_message,
" The following columns are not integer-like (or are missing):\n",
" ", paste0("`", failed_cols, "`", collapse = ", ")
)
}

if (length(unique(model[[".draw"]])) != nrow(model)) {
stop(stop_message,
" The `.draw` column in the input data frame has more than one row per draw\n",
" (its values are not unique)."
)
}

model
}

#' @rdname tidy_draws
#' @export
tidy_draws.mcmc.list = function(model) {
Expand Down
11 changes: 11 additions & 0 deletions man/tidy_draws.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

54 changes: 53 additions & 1 deletion tests/testthat/test.tidy_draws.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ test_that("tidy_draws works with rstan", {
# jags --------------------------------------------------------------------
test_that("tidy_draws works with runjags", {
# runjags will still load without JAGS, it just fails later (so skipping on runjags alone will
# not work correctly if runjags is installed but the system does not have JAGS). So we skip if
# not work correctly if runjags is installed but the system does not have JAGS). So we skip if
# rjags does not load as well, as rjags will correctly fail to load if JAGS isn't installed.
skip_if_not_installed("rjags")
skip_if_not_installed("runjags")
Expand Down Expand Up @@ -158,3 +158,55 @@ test_that("tidy_draws works with jagsUI", {

expect_equal(tidy_draws(m), draws_tidy)
})



# existing data frames ----------------------------------------------------
test_that("tidy_draws is idempotent on existing data frames", {
data(RankCorr)

tidy_rc = tidy_draws(RankCorr)

expect_identical(tidy_draws(tidy_rc), tidy_rc)
})

test_that("tidy_draws works on existing data frames with numeric columns", {
data(RankCorr)

tidy_rc = tidy_draws(RankCorr)
tidy_rc_n = tidy_rc
tidy_rc_n$.chain = as.numeric(tidy_rc_n$.chain)
tidy_rc_n$.iteration = as.numeric(tidy_rc_n$.iteration)
tidy_rc_n$.draw = as.numeric(tidy_rc_n$.draw)

expect_identical(tidy_draws(tidy_rc_n), tidy_rc)

tidy_rc$.chain = NA_integer_
tidy_rc$.iteration = NA_integer_
tidy_rc_n$.chain = NA
tidy_rc_n$.iteration = NA

expect_identical(tidy_draws(tidy_rc_n), tidy_rc)
})

test_that("tidy_draws fails on existing data frames with incorrect column types", {
data(RankCorr)

tidy_rc = tidy_draws(RankCorr)
tidy_rc$.chain = "a"

expect_error(tidy_draws(tidy_rc), "The following columns are not integer-like.*\\.chain")

tidy_rc$.chain = NULL

expect_error(tidy_draws(tidy_rc), "The following columns.*are missing.*\\.chain")
})

test_that("tidy_draws fails on existing data frames with incorrect column types", {
data(RankCorr)

tidy_rc = tidy_draws(RankCorr)
tidy_rc$.draw = 1

expect_error(tidy_draws(tidy_rc), "The `\\.draw` column in the input data frame has more than one row per draw")
})

0 comments on commit 14b9f93

Please sign in to comment.