Fixes to code and roxygen2 docs in susie_trendfilter.R.

nvrivera · Jul 9, 2020 · e36e68e · e36e68e
1 parent ba206c2
commit e36e68e
Show file tree

Hide file tree

Showing 14 changed files with 186 additions and 165 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -15,7 +15,7 @@ Description: Implements methods for variable selection in linear
     and fast, allowing the SuSiE model be fit to large data sets
     (thousands of samples and hundreds of thousands of variables).
 Date: 2020-07-09
-Version: 0.9.21
+Version: 0.9.22
 Authors@R: c(person("Gao","Wang",role="aut",email="[email protected]"),
              person("Yuxin","Zou",role="aut"),
              person("Kaiqian","Zhang",role="aut"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -32,6 +32,7 @@ importFrom(Matrix,colSums)
 importFrom(Matrix,crossprod)
 importFrom(Matrix,diag)
 importFrom(Matrix,rowSums)
+importFrom(Matrix,sparseMatrix)
 importFrom(Matrix,t)
 importFrom(Matrix,tcrossprod)
 importFrom(ggplot2,aes_string)

diff --git a/R/predict.susie.R b/R/predict.susie.R
@@ -15,7 +15,7 @@
 #' @export
 #' 
 coef.susie = function (object, ...) {
-  s <- object
+  s = object
   return(c(s$intercept,colSums(s$alpha*s$mu)/s$X_column_scale_factors))
 }
 
@@ -41,8 +41,8 @@ coef.susie = function (object, ...) {
 #' 
 predict.susie = function (object, newx = NULL,
                           type = c("response","coefficients"), ...) {
-  s <- object
-  type <- match.arg(type)
+  s = object
+  type = match.arg(type)
   if (type == "coefficients") {
     if (!missing(newx))
       stop("Do not supply newx when predicting coefficients")

diff --git a/R/set_X_attributes.R b/R/set_X_attributes.R
@@ -17,18 +17,18 @@ set_X_attributes = function(X, center = TRUE, scale = TRUE) {
 
   # if X is a trend filtering matrix
   if (!is.null(attr(X,"matrix.type"))) {
-    order <- attr(X,"order")
-    n <- ncol(X)
+    order = attr(X,"order")
+    n = ncol(X)
 
     # Set three attributes for X.
-    attr(X, "scaled:center") <- compute_tf_cm(order, n)
-    attr(X, "scaled:scale") <- compute_tf_csd(order, n)
-    attr(X, "d") <- compute_tf_d(order,n,attr(X,"scaled:center"),
+    attr(X, "scaled:center") = compute_tf_cm(order, n)
+    attr(X, "scaled:scale") = compute_tf_csd(order, n)
+    attr(X, "d") = compute_tf_d(order,n,attr(X,"scaled:center"),
                                  attr(X,"scaled:scale"),scale,center)
     if (!center)
-      attr(X, "scaled:center") <- rep(0, n)
+      attr(X, "scaled:center") = rep(0, n)
     if (!scale)
-      attr(X, "scaled:scale") <- rep(1, n)
+      attr(X, "scaled:scale") = rep(1, n)
   } else {
 
     # If X is either a dense or sparse ordinary matrix.
@@ -47,9 +47,9 @@ set_X_attributes = function(X, center = TRUE, scale = TRUE) {
     X.std = (t(X) - cm)/csd
 
     # Set three attributes for X.
-    attr(X,"d") <- rowSums(X.std * X.std)
-    attr(X,"scaled:center") <- cm
-    attr(X,"scaled:scale") <- csd
+    attr(X,"d") = rowSums(X.std * X.std)
+    attr(X,"scaled:center") = cm
+    attr(X,"scaled:scale") = csd
   }
   return(X)
 }

diff --git a/R/sparse_multiplication.R b/R/sparse_multiplication.R
@@ -14,14 +14,14 @@ compute_Xb = function (X, b) {
   if (!is.null(attr(X,"matrix.type")))
 
     # When X is a trend filtering matrix.
-    scaled.Xb <- compute_tf_Xb(attr(X,"order"),b/csd)
+    scaled.Xb = compute_tf_Xb(attr(X,"order"),b/csd)
   else
 
     # When X is an ordinary sparse/dense matrix.
-    scaled.Xb <- tcrossprod(X,t(b/csd))
+    scaled.Xb = tcrossprod(X,t(b/csd))
 
   # Center Xb.
-  Xb <- scaled.Xb - sum(cm*b/csd)
+  Xb = scaled.Xb - sum(cm*b/csd)
   return(as.numeric(Xb))
 }
 
@@ -36,20 +36,20 @@ compute_Xb = function (X, b) {
 compute_Xty = function (X, y) {
   cm = attr(X,"scaled:center")
   csd = attr(X,"scaled:scale")
-  ytX <- crossprod(y,X)
+  ytX = crossprod(y,X)
 
   # Scale Xty.
   if (!is.null(attr(X,"matrix.type")))
 
     # When X is a trend filtering matrix.
-    scaled.Xty <- compute_tf_Xty(attr(X,"order"),y)/csd
+    scaled.Xty = compute_tf_Xty(attr(X,"order"),y)/csd
   else
 
     # When X is an ordinary sparse/dense matrix.
-    scaled.Xty <- t(ytX/csd)
+    scaled.Xty = t(ytX/csd)
 
   # Center Xty.
-  centered.scaled.Xty <- scaled.Xty - cm/csd * sum(y)
+  centered.scaled.Xty = scaled.Xty - cm/csd * sum(y)
   return(as.numeric(centered.scaled.Xty))
 }
 

diff --git a/R/susie_plot_changepoint.R b/R/susie_plot_changepoint.R
@@ -25,8 +25,7 @@
 #' y = mu + rnorm(500)
 #' s = susie_trendfilter(y)
 #'
-#' # Produces ggplot with credible sets for changepoints on top of
-#' # plot.
+#' # Produces ggplot with credible sets for changepoints.
 #' susie_plot_changepoint(s,y) 
 #'
 #' @importFrom ggplot2 ggplot

diff --git a/R/susie_trendfilter.R b/R/susie_trendfilter.R
@@ -1,39 +1,54 @@
-#susie_trendfilter
-
-#' @title Applies susie to perform trend filtering (especially
+#' @title Apply susie to trend filtering (especially
 #'   changepoint problems), a type of non-parametric regression.
 #'
-#' @details Fits the non-parametric Gaussian regression model $y=mu
-#' +e$, where the mean $mu$ is modelled as $mu=Xb$ where $X$ is a
-#' matrix with columns containing an appropriate basis and b is vector
-#' with a (sparse) SuSiE prior. In particular, when order=0, the $j$th
-#' column of $X$ is a vector with first $j$ elements equal to 0 and
-#' remaining elements equal to 1, so b_j corresponds to the change in
-#' the mean of y between indices $j$ and $j+1$.  For background on
-#' trend filtering see [Adaptive piecewise polynomial estimation via
-#' trend
-#' filtering](https://projecteuclid.org/euclid.aos/1395234979)(Ryan
-#' J. Tibshirani, 2014) The implementation here exploits the special
-#' structure of $X$ whiuch means that the matrix-vector product $X'y$
-#' is fast to compute in O(n) computation rather than $O(n^2)$ if $X$
-#' were formed explicitly. For implementation details, view the
-#' "trendfiltering derivations" vignette by running
-#' \code{vignette("trendfiltering_derivations")}.
+#' @description Fits the non-parametric Gaussian regression model
+#'   \eqn{y = mu + e}, where the mean \eqn{mu} is modelled as \eqn{mu =
+#'   Xb}, X is a matrix with columns containing an appropriate basis,
+#'   and b is vector with a (sparse) SuSiE prior. In particular, when
+#'   \code{order = 0}, the jth column of X is a vector with the first j
+#'   elements equal to zero, and the remaining elements equal to 1, so
+#'   that \eqn{b_j} corresponds to the change in the mean of y between
+#'   indices j and j+1. For background on trend filtering, see
+#'   Tibshirani (2014).
+#'
+#' @details The implementation exploits the special structure of X,
+#'   which means that the matrix-vector product \eqn{X^Ty} is fast to
+#'   compute in \eqn{O(n)} computation rather than \eqn{O(n^2)} if X
+#'   were formed explicitly. For implementation details, view the "Trend
+#'   filtering" vignette by running
+#'   \code{vignette("trendfiltering_derivations")}.
+#'
+#' @param y An n-vector of observations ordered in time or space
+#'   (assumed to be equally spaced).
 #'
-#' @param y an n vector of observations that are ordered in time or
-#'   space (assumed equally-spaced)
+#' @param order An integer specifying the order of trend filtering.
+#'   The default, \code{order = 0}, corresponds to "changepoint"
+#'   problems (\emph{i.e.}, piecewise constant \eqn{mu}). Although
+#'   \code{order > 0} is implemented, we do not recommend its use; in
+#'   practice, we have found problems with convergence of the algorithm
+#'   to poor local optima, producing unreliable inferences.
+#' 
+#' @param standardize Logical indicating whether to standardize the X
+#'   variables ("basis functions"); defaults to \code{standardize = FALSE}
+#'   as these basis functions already have a natural scale.
+#' 
+#' @param use_mad Logical indicating whether to use the "median
+#'   absolute deviation" (MAD) method to the estimate residual
+#'   variance. If \code{use_mad = TRUE}, susie is run twice, first by
+#'   fixing the residual variance to the MAD value, then a second time,
+#'   initialized to the first fit, but with residual variance estimated
+#'   the usual way (maximizing the ELBO). We have found this strategy
+#'   typically improves reliability of the results by reducing a
+#'   tendency to converge to poor local optima of the ELBO.
+#' 
+#' @param ... Other arguments passed to \code{\link{susie}}.
+#' 
+#' @return A "susie" fit; see \code{\link{susie}} for details.
 #'
-#' @param order an integer specifying the order of trend filtering. Default order=0, which corresponds
-#' to "changepoint" problems (i.e. piecewise constant mu). Although order > 0 is implemented, we do not recommend using it since we
-#' find there are often problems with convergence of the algorithm to poor local optima, producing unreliable inferences.
-#' @param standardize boolean indicating whether to standardize X variables (basis functions); defaults to FALSE as these basis functions already have a natural scale
-#' @param use_mad boolean indicating whether to use ``median absolute deviation" (MAD) method to estimate residual variance.
-#' If TRUE then susie is run twice,
-#' first by fixing the residual variance to the MAD value and then a second time, initializing from the first fit,
-#' but with residual variance estimated the usual way (maximizing the ELBO). We have found this strategy
-#' typically improves reliability of results by reducing a tendency to converge to poor local optima of the ELBO.
-#' @param ... other parameters to pass to susie
-#' @return a "susie" object; see ?susie for details
+#' @references R. J. Tibshirani (2014). Adaptive piecewise polynomial
+#' estimation via trend filtering. \emph{Annals of Statistics}
+#' \bold{42}, 285-323. \url{https://doi.org/10.1214/13-AOS1189}
+#' 
 #' @examples
 #' set.seed(1)
 #' mu = c(rep(0,50),rep(1,50),rep(3,50),rep(-2,50),rep(0,300))
@@ -42,31 +57,38 @@
 #' plot(y)
 #' lines(mu,col=1,lwd=3)
 #' lines(predict(s),col=2,lwd=2)
-#' susie_get_cs(s) # returns credible sets (for indices of y that occur just before changepoints)
-#' susie_plot_changepoint(s,y) # produces ggplot with credible sets for changepoints on top of plot
+#'
+#' # Returns credible sets (indices of y that occur just before
+#' # changepoints).
+#' susie_get_cs(s)
+#'
+#' # Produces plot with credible sets for changepoints.
+#' susie_plot_changepoint(s,y) 
+#'
+#' @importFrom Matrix sparseMatrix
+#' 
 #' @export
-susie_trendfilter = function(y, order=0, standardize=FALSE, use_mad=TRUE, ...){
-  if (order > 0){
-    warning("order>0 is not recommended (see ?susie_trendfilter for more explanation).")
-  }
+#' 
+susie_trendfilter = function (y, order = 0, standardize = FALSE,
+                              use_mad = TRUE, ...) {
+  if (order > 0)
+    warning("order > 0 is not recommended")
   n = length(y)
-  X <- Matrix::sparseMatrix(i=NULL,j=NULL,dims=c(n,n))
-  attr(X, "matrix.type") = "tfmatrix"
-  attr(X, "order") = order
+  X = sparseMatrix(i = NULL,j = NULL,dims = c(n,n))
+  attr(X,"matrix.type") = "tfmatrix"
+  attr(X,"order") = order
   if (use_mad && !("s_init" %in% names(list(...)))) {
     mad = estimate_mad_residual_variance(y)
-    s_mad_init = suppressWarnings(susie(X=X, Y=y, standardize = standardize, estimate_residual_variance = FALSE, residual_variance = mad, ...))
-    s = susie(X=X, Y=y, standardize=standardize, s_init=s_mad_init, ...)
-  } else {
-    s = susie(X=X, Y=y, standardize=standardize, ...)
-  }
+    s_mad_init = suppressWarnings(susie(X = X,Y = y,standardize = standardize,
+      estimate_residual_variance = FALSE,residual_variance = mad,...))
+    s = susie(X = X,Y = y,standardize = standardize, s_init = s_mad_init,...)
+  } else
+    s = susie(X = X,Y = y,standardize = standardize,...)
   return(s)
 }
 
-#' @title estimate residual variance using MAD estimator
-#' @param y an n vector
-#' @return a scalar of estimated residual variance
-#' @keywords internal
-estimate_mad_residual_variance = function(y){
-  return(0.5*(median(abs(diff(y))/0.6745)^2))
-}
+# @title estimate residual variance using MAD estimator
+# @param y an n vector
+# @return a scalar of estimated residual variance
+estimate_mad_residual_variance = function (y)
+  0.5*(median(abs(diff(y))/0.6745)^2)
diff --git a/R/update_each_effect.R b/R/update_each_effect.R
@@ -6,9 +6,9 @@
 #   estimate prior variance
 # @param check_null_threshold float a threshold on the log scale to
 #   compare likelihood between current estimate and zero the null
-update_each_effect <- function (X, Y, s, estimate_prior_variance = FALSE,
-                                estimate_prior_method = "optim",
-                                check_null_threshold) {
+update_each_effect = function (X, Y, s, estimate_prior_variance = FALSE,
+                               estimate_prior_method = "optim",
+                               check_null_threshold) {
   if (!estimate_prior_variance)
     estimate_prior_method = "none"
 
@@ -23,21 +23,21 @@ update_each_effect <- function (X, Y, s, estimate_prior_variance = FALSE,
       # Compute residuals.
       R = Y - s$Xr
 
-      res <- single_effect_regression(R,X,s$V[l],s$sigma2,s$pi,
-                                      estimate_prior_method,
-                                      check_null_threshold)
+      res = single_effect_regression(R,X,s$V[l],s$sigma2,s$pi,
+                                     estimate_prior_method,
+                                     check_null_threshold)
 
       # Update the variational estimate of the posterior mean.
-      s$mu[l,]    <- res$mu
-      s$alpha[l,] <- res$alpha
-      s$mu2[l,]   <- res$mu2
-      s$V[l]      <- res$V
-      s$lbf[l]    <- res$lbf_model
-      s$KL[l]     <- -res$loglik +
+      s$mu[l,]    = res$mu
+      s$alpha[l,] = res$alpha
+      s$mu2[l,]   = res$mu2
+      s$V[l]      = res$V
+      s$lbf[l]    = res$lbf_model
+      s$KL[l]     = -res$loglik +
         SER_posterior_e_loglik(X,R,s$sigma2,res$alpha * res$mu,
                                res$alpha * res$mu2)
 
-      s$Xr <- s$Xr + compute_Xb(X,s$alpha[l,] * s$mu[l,])
+      s$Xr = s$Xr + compute_Xb(X,s$alpha[l,] * s$mu[l,])
     }
   return(s)
 }

diff --git a/R/update_each_effect_rss.R b/R/update_each_effect_rss.R
@@ -9,10 +9,10 @@
 #   compare likelihood between current estimate and zero the null
 # 
 #' @importFrom Matrix diag
-update_each_effect_rss <- function (R, z, s_init, Sigma,
-                                    estimate_prior_variance = FALSE,
-                                    estimate_prior_method = "optim",
-                                    check_null_threshold = 0) {
+update_each_effect_rss = function (R, z, s_init, Sigma,
+                                   estimate_prior_variance = FALSE,
+                                   estimate_prior_method = "optim",
+                                   check_null_threshold = 0) {
 
   if (!estimate_prior_variance)
     estimate_prior_method = "none"
@@ -32,15 +32,15 @@ update_each_effect_rss <- function (R, z, s_init, Sigma,
               estimate_prior_method,check_null_threshold)
 
       # Update the variational estimate of the posterior mean.
-      s$mu[l,]    <- res$mu
-      s$alpha[l,] <- res$alpha
-      s$mu2[l,]   <- res$mu2
-      s$V[l]      <- res$V
-      s$lbf[l]    <- res$lbf_model
-      s$KL[l]     <- -res$lbf_model +
+      s$mu[l,]    = res$mu
+      s$alpha[l,] = res$alpha
+      s$mu2[l,]   = res$mu2
+      s$V[l]      = res$V
+      s$lbf[l]    = res$lbf_model
+      s$KL[l]     = -res$lbf_model +
         SER_posterior_e_loglik_rss(R,Sigma,r,res$alpha * res$mu,
                                    res$alpha * res$mu2)
-      s$Rz <- s$Rz + R %*% (s$alpha[l,] * s$mu[l,])
+      s$Rz = s$Rz + R %*% (s$alpha[l,] * s$mu[l,])
     }
   s$Rz = unname(as.matrix(s$Rz))
   return(s)

diff --git a/R/update_each_effect_ss.R b/R/update_each_effect_ss.R
@@ -10,10 +10,10 @@
 #   compare likelihood between current estimate and zero the null
 # 
 #' @importFrom Matrix diag
-update_each_effect_ss <- function (XtX, Xty, s_init,
-                                   estimate_prior_variance = FALSE,
-                                   estimate_prior_method = "optim",
-                                   check_null_threshold = 0) {
+update_each_effect_ss = function (XtX, Xty, s_init,
+                                  estimate_prior_variance = FALSE,
+                                  estimate_prior_method = "optim",
+                                  check_null_threshold = 0) {
   if (!estimate_prior_variance)
     estimate_prior_method = "none"
 
@@ -32,16 +32,16 @@ update_each_effect_ss <- function (XtX, Xty, s_init,
               s$sigma2,s$pi,estimate_prior_method,check_null_threshold)
 
       # Update the variational estimate of the posterior mean.
-      s$mu[l,]    <- res$mu
-      s$alpha[l,] <- res$alpha
-      s$mu2[l,]   <- res$mu2
-      s$V[l]      <- res$V
-      s$lbf[l]    <- res$lbf_model
-      s$KL[l]     <- -res$lbf_model +
+      s$mu[l,]    = res$mu
+      s$alpha[l,] = res$alpha
+      s$mu2[l,]   = res$mu2
+      s$V[l]      = res$V
+      s$lbf[l]    = res$lbf_model
+      s$KL[l]     = -res$lbf_model +
         SER_posterior_e_loglik_ss(attr(XtX,"d"),XtR,s$sigma2,
                                   res$alpha * res$mu,res$alpha * res$mu2)
 
-      s$XtXr <- s$XtXr + XtX %*% (s$alpha[l,] * s$mu[l,])
+      s$XtXr = s$XtXr + XtX %*% (s$alpha[l,] * s$mu[l,])
     }
   }
   s$XtXr = unname(as.matrix(s$XtXr))