Skip to content

Commit

Permalink
Merge pull request #3 from YunyiShen/master
Browse files Browse the repository at this point in the history
fixed mac install problem
  • Loading branch information
crsl4 authored Mar 2, 2021
2 parents cc9be75 + d498bc9 commit f02dccf
Show file tree
Hide file tree
Showing 21 changed files with 606 additions and 18 deletions.
62 changes: 58 additions & 4 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,73 @@ name: R-CMD-check

jobs:
R-CMD-check:
runs-on: macOS-latest
runs-on: ${{ matrix.config.os }}

name: ${{ matrix.config.os }} (${{ matrix.config.r }})

strategy:
fail-fast: false
matrix:
config:
- {os: windows-latest, r: 'release'}
- {os: macOS-latest, r: 'release'}
- {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
- {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}

env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-r@v1
with:
r-version: ${{ matrix.config.r }}

- uses: r-lib/actions/setup-pandoc@v1

- name: Query dependencies
run: |
install.packages('remotes')
saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
shell: Rscript {0}

- name: Cache R packages
if: runner.os != 'Windows'
uses: actions/cache@v2
with:
path: ${{ env.R_LIBS_USER }}
key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-

- name: Install system dependencies
if: runner.os == 'Linux'
run: |
while read -r cmd
do
eval sudo $cmd
done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
- name: Install dependencies
run: |
install.packages(c("remotes", "rcmdcheck"))
remotes::install_deps(dependencies = TRUE)
remotes::install_cran("rcmdcheck")
shell: Rscript {0}

- name: Check
env:
_R_CHECK_CRAN_INCOMING_REMOTE_: false
run: |
options(crayon.enabled = TRUE)
rcmdcheck::rcmdcheck(args = "--no-manual", error_on = "error")
shell: Rscript {0}
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
shell: Rscript {0}

- name: Upload check results
if: failure()
uses: actions/upload-artifact@main
with:
name: ${{ runner.os }}-r${{ matrix.config.r }}-results
path: check
2 changes: 1 addition & 1 deletion CAR-LASSO.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ LaTeX: pdfLaTeX
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate
PackageRoxygenize: rd,collate,vignette
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import(Rcpp, RcppArmadillo, RcppProgress)
import(coda, Matrix, MASS)
import(igraph, ggraph, ggplot2)
importFrom("stats", "as.formula", "formula", "model.matrix", "na.omit",
"rnorm", "sd", "terms")
useDynLib("CARlasso", .registration = TRUE)
export(CARlasso)
export(plot.carlasso_out)
export(horseshoe)
export(simu_AR1)
S3method("plot", "carlasso_out", plot.carlasso_out)
export(rCARlasso_, rCARAlasso_)
16 changes: 13 additions & 3 deletions R/CAR-LASSO.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
#' @param n_iter Number of sampling iterations (i.e. after burn in) for the Gibbs sampler
#' @param n_burn_in Number of burn in iterations for the Gibbs sampler
#' @param thin_by Final sample was thin by this number
#' @param ns parameter for ARS, maximum number of hulls, only used when link is "log" and "logit"
#' @param m parameter for ARS, initial number of hulls, only used when link is "log" and "logit"
#' @param emax parameter for ARS, tolerence for small values being 0, larger meaning we tolerent smaller values, only used when link is "log" and "logit"
#' @param progress Bool, whether report progress from C++
#' @param verbos Bool, whether show warnings and messages.
#'
Expand Down Expand Up @@ -83,6 +86,7 @@ CARlasso <- function(formula, # a double sided formula needed, e.g. x+y~a+b
lambda_diag = 0,
n_iter = 2000,
n_burn_in = 1000, thin_by = 10,
ns = 1000, m=20, emax=64,
progress = TRUE, verbos = TRUE) {
# some waring messages
err_no_predictor <- "No predictor supplied.\n\n"
Expand Down Expand Up @@ -283,6 +287,7 @@ CARlasso <- function(formula, # a double sided formula needed, e.g. x+y~a+b
n_iter, n_burn_in,
thin_by, r_beta, delta_beta,
r_Omega, delta_Omega, lambda_diag,
ns, m, emax,
progress
)
}
Expand All @@ -291,7 +296,9 @@ CARlasso <- function(formula, # a double sided formula needed, e.g. x+y~a+b
y, design,
n_iter, n_burn_in,
thin_by, r_beta, delta_beta,
r_Omega, delta_Omega, progress
r_Omega, delta_Omega,
ns, m, emax,
progress
)
}
}
Expand All @@ -306,6 +313,7 @@ CARlasso <- function(formula, # a double sided formula needed, e.g. x+y~a+b
n_iter, n_burn_in,
thin_by, r_beta, delta_beta,
r_Omega, delta_Omega, lambda_diag,
ns, m, emax,
progress
)
}
Expand All @@ -314,7 +322,9 @@ CARlasso <- function(formula, # a double sided formula needed, e.g. x+y~a+b
y, design,
n_iter, n_burn_in,
thin_by, r_beta, delta_beta,
r_Omega, delta_Omega, progress
r_Omega, delta_Omega,
ns, m, emax,
progress
)
}
}
Expand All @@ -331,7 +341,7 @@ CARlasso <- function(formula, # a double sided formula needed, e.g. x+y~a+b
settings <- list(formula = formula, link = link, adaptive = adaptive,
r_beta = r_beta , delta_beta = delta_beta , r_Omega = r_Omega,
delta_Omega = delta_Omega, lambda_diag = lambda_diag, n_iter = n_iter,
n_burn_in = n_burn_in, thin_by = thin_by, progress = progress, verbos = verbos)
n_burn_in = n_burn_in, thin_by = thin_by, ns=ns,m=m,emax=emax,progress = progress, verbos = verbos)

nodes <- list(response = colnames(y), predictors = colnames(design))
res <- list(point_est = point_est, nodes = nodes,
Expand Down
49 changes: 49 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,55 @@ stein_loss_cpp <- function(Omega, Omega_hat) {
.Call(`_CARlasso_stein_loss_cpp`, Omega, Omega_hat)
}

#' @title Block Gibbs sampler for adaptive CAR-LASSO
#'
#' @description \strong{This function is for advanced users to build their own sampler use adaptive CARlasso as core.} It will execute one round of Gibbs sampler of adaptive CAR-LASSO model. Be aware that the function is a `void` function implemented in C++, and all updated parameters e.g. Omega will be manipulate directly in memory to save space. Users should manage to do their own work to save the state. Also be aware that R uses shallow copy by default, which means one cannot save the state by simply give it to another object e.g. first `Omega_old <- Omega_curr` then update `Omega_curr`, `Omega_old` will also change. \strong{This function will NOT check dimensions of input.} Below we assume n samples, k responses and p predictors.
#' @param Z_curr the current (latent) normal Z_curr, should be n*k. Will not be changed
#' @param design the design matrix, should be n*p. Will not be changed
#' @param lambda2_beta the current shrinkage parameter of regression coefficients, should be a vector with p*k entries. Will be updated
#' @param tau2_curr the current latent scale parameter in the normal mixture representation of Laplace, for regression coefficients, should be a vector with p*k entries. Will be updated.
#' @param beta_curr the current regression coefficients, should be a matrix sized p*k (p row and k columns). Will be updated.
#' @param lambda_Omega the current shrinkage parameter for Omega, should be a vector with k*(k-1)/2 entries. Will be updated.
#' @param Omega_curr the current Omega matrix, should be a matrix of size k*k. Will be updated.
#' @param mu_curr the current mu, intercept, should be a vector of size k. Will be updated.
#' @param r_beta hyperprior's parameter of shrinkage for regression coefficients, should be a scalar of type 'double' and positive. Will not be updated.
#' @param delta_beta hyperprior's parameter of shrinkage for regression coefficients, should be a scalar of type 'double' and positive. Will not be updated.
#' @param r_Omega hyperprior's parameter of shrinkage for precision Omega, should be a scalar of type 'double' and positive. Will not be updated.
#' @param delta_Omega hyperprior's parameter of shrinkage for rprecision Omega, should be a scalar of type 'double' and positive. Will not be updated.
#' @param lambda_diag shrinkage parameter of the diagonal of Omega, should be a vector of size k, should be non-negative. Will not be updated
#' @param k integer, number of responses
#' @param p integer, number of predictors
#' @param n integer, number of Z_curr points
#' @return Again this is a `void` function and will not return anything. All update happened in memory directly.
rCARAlasso_ <- function(Z_curr, design, lambda2_beta, tau2_curr, beta_curr, lambda_Omega, Omega_curr, mu_curr, r_beta, delta_beta, r_Omega, delta_Omega, lambda_diag, k, p, n) {
invisible(.Call(`_CARlasso_rCARAlasso_`, Z_curr, design, lambda2_beta, tau2_curr, beta_curr, lambda_Omega, Omega_curr, mu_curr, r_beta, delta_beta, r_Omega, delta_Omega, lambda_diag, k, p, n))
}

#' @title Block Gibbs sampler for CAR-LASSO
#'
#' @description \strong{This function is for advanced users to build their own sampler use CARlasso as core.} It will execute one round of Gibbs sampler of CAR-LASSO model. Be aware that the function is a `void` function implemented in C++, and all updated parameters e.g. Omega will be manipulate directly in memory to save space. Users should manage to do their own work to save the state. Also be aware that R uses shallow copy by default, which means one cannot save the state by simply give it to another object e.g. first `Omega_old <- Omega_curr` then update `Omega_curr`, `Omega_old` will also change. \strong{This function will NOT check dimensions of input.} Below we assume n samples, k responses and p predictors.
#'
#' @param Z_curr the current (latent) normal data, should be n*k. Will not be changed
#' @param design the design matrix, should be n*p. Will not be changed
#' @param lambda2_beta the current shrinkage parameter of regression coefficients, should be a scalar of type `double`. Will be updated
#' @param tau2_curr the current latent scale parameter in the normal mixture representation of Laplace, for regression coefficients, should be a vector with p*k entries. Will be updated.
#' @param beta_curr the current regression coefficients, should be a matrix sized p*k (p row and k columns). Will be updated.
#' @param lambda_Omega the current shrinkage parameter for Omega, should be a scalar of tyoe `double`. Will be updated.
#' @param Omega_curr the current Omega matrix, should be a matrix of size k*k. Will be updated.
#' @param mu_curr the current mu, intercept, should be a vector of size k. Will be updated.
#' @param r_beta hyperprior's parameter of shrinkage for regression coefficients, should be a scalar of type 'double' and positive. Will not be updated.
#' @param delta_beta hyperprior's parameter of shrinkage for regression coefficients, should be a scalar of type 'double' and positive. Will not be updated.
#' @param r_Omega hyperprior's parameter of shrinkage for precision Omega, should be a scalar of type 'double' and positive. Will not be updated.
#' @param delta_Omega hyperprior's parameter of shrinkage for rprecision Omega, should be a scalar of type 'double' and positive. Will not be updated.
#' @param k integer, number of responses
#' @param p integer, number of predictors
#' @param n integer, number of data points
#' @return Again this is a `void` function and will not return anything. All update happened in memory directly.
#' @export
rCARlasso_ <- function(Z_curr, design, lambda2_beta, tau2_curr, beta_curr, lambda_Omega, Omega_curr, mu_curr, r_beta, delta_beta, r_Omega, delta_Omega, k, p, n) {
invisible(.Call(`_CARlasso_rCARlasso_`, Z_curr, design, lambda2_beta, tau2_curr, beta_curr, lambda_Omega, Omega_curr, mu_curr, r_beta, delta_beta, r_Omega, delta_Omega, k, p, n))
}

rmultireg <- function(Y, X, Bbar, A, nu, V) {
.Call(`_CARlasso_rmultireg`, Y, X, Bbar, A, nu, V)
}
Expand Down
7 changes: 5 additions & 2 deletions R/graph-learning.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
horseshoe <- function(obj, Bbar=NULL, A = NULL, nu=3, V=NULL, thr = 0.5 ){
y <- obj$data$response
design <- obj$data$design
ns <- obj$settings$ns
m <- obj$settings$m
emax <- obj$settings$emax
if(obj$settings$link == "identity"){
multireg_res <- CAR_multireg(y,design,nrow(obj$MCMC_output$beta),
Bbar, A, nu, V)
Expand All @@ -34,13 +37,13 @@ horseshoe <- function(obj, Bbar=NULL, A = NULL, nu=3, V=NULL, thr = 0.5 ){
if(obj$settings$link == "log"){
multireg_res <- Pois_CAR_multireg(y,design,obj$settings$n_burn_in,obj$settings$n_iter,
obj$settings$thin_by,
Bbar, A, nu, V)
Bbar, A, nu, V, ns, m, emax)
}

if(obj$settings$link == "logit"){
multireg_res <- Multinomial_CAR_multireg(y,design,obj$settings$n_burn_in,obj$settings$n_iter,
obj$settings$thin_by,
Bbar, A, nu, V)
Bbar, A, nu, V, ns, m, emax)
}


Expand Down
37 changes: 37 additions & 0 deletions R/mgp_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#' Gut microbiota in the Irish Elderly
#'
#' This study is based on pyrosequencing of 16S rDNA amplicons from faecal samples collected from 178 elderly Irish citizens and 13 healthy young control subjects. A subset of these samples were also subjected to shotgun sequencing using Illumina HiSeq 2000 2x91bp reads. Antibiotic treatment was an exclusion criterion.
#'
#' @docType data
#'
#' @usage data(mgp154)
#'
#' @format An data.frame with genus and predictors.
#'
#' @keywords datasets
#'
#' @references Claesson, Marcus J., et al. "Gut microbiota composition correlates with diet and health in the elderly." Nature 488.7410 (2012): 178-184.
#'
#' @source \href{https://www.mg-rast.org/mgmain.html?mgpage=project&project=mgp154}{MG-RAST-mgp154}
#'
"mgp154"



#' Hofmockel Soil Aggregate COB KBASE
#'
#' This study is based on pyrosequencing of 16S rDNA amplicons from faecal samples collected from 178 elderly Irish citizens and 13 healthy young control subjects. A subset of these samples were also subjected to shotgun sequencing using Illumina HiSeq 2000 2x91bp reads. Antibiotic treatment was an exclusion criterion.
#'
#' @docType data
#'
#' @usage data(mgp2592)
#'
#' @format An data.frame with genus and predictors.
#'
#' @keywords datasets
#'
#' @references Bach, Elizabeth M., et al. "Greatest soil microbial diversity found in micro-habitats." Soil biology and Biochemistry 118 (2018): 217-226.
#'
#' @source \href{https://www.mg-rast.org/mgmain.html?mgpage=project&project=mgp2592}{MG-RAST-mgp2592}
#'
"mgp2592"
10 changes: 8 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
plot.carlasso_out <- function(x, ...) {
dots <- list(...)
tol <- dots$tol
if(x$settings$link=="logit"){
response_name <- x$nodes$response[-length(x$nodes$response)]
}
else{
response_name <- x$nodes$response
}
if(is.null(tol)) tol = 0.01
col_pn <- c("lightblue","pink")
# graph structure using threshold:
Expand All @@ -28,7 +34,7 @@ plot.carlasso_out <- function(x, ...) {
CAR <- get_CAR_MB(x$point_est$beta*B_binary,
Graph_binary*x$point_est$Omega)

n_resp <- length(x$nodes$response)
n_resp <- length(response_name)
n_pred <- length(x$nodes$predictors)

vertices_df <- data.frame(id = c(paste0("resp", 1:n_resp),
Expand Down Expand Up @@ -75,7 +81,7 @@ plot.carlasso_out <- function(x, ...) {
E(full_graph)$abs_weight <- abs(E(full_graph)$weight)


V(full_graph)$name <- c(x$nodes$response, x$nodes$predictors)
V(full_graph)$name <- c(response_name, x$nodes$predictors)

V(full_graph)$alpha_centrality <- alpha_centrality(full_graph)
V(full_graph)$type <- type[c(rep(2, n_resp), rep(1, n_pred))]
Expand Down
31 changes: 31 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,34 @@ plot(car_res,tol = 0.05)
car_res <- horseshoe(car_res)
plot(car_res)
```

To run a reduced version of the analysis on human gut microbiome (with less predictors and responses), try:

```r
gut_res <- CARlasso(Alistipes+Bacteroides+
Eubacterium+Parabacteroides+all_others~
BMI+Age+Gender+Stratum,
data = mgp154,link = "logit",
adaptive = TRUE, n_iter = 5000,
n_burn_in = 1000, thin_by = 10)
# horseshoe will take a while, as it's currently implemented in R rather than C++
gut_res <- horseshoe(gut_res)
plot(gut_res)
```
It might take a little while due to the sampling process of the latent normal variable

Though we don't recommend treating compositional data as counts, as a illustration, we can run the counting model:

```r
gut_res <- CARlasso(Alistipes+Bacteroides+
Eubacterium+Parabacteroides+all_others~
BMI+Age+Gender+Stratum,
data = mgp154,link = "log",
adaptive = TRUE,
r_beta = 0.1, # default sometimes cause singularity in Poisson model due to exponential transformation, slightly change can fix it.
n_iter = 5000,
n_burn_in = 1000, thin_by = 10)
# horseshoe will take a while, as it's currently implemented in R rather than C++
gut_res <- horseshoe(gut_res)
plot(gut_res)
```
Binary file added data/mgp154.rda
Binary file not shown.
Binary file added data/mgp2592.rda
Binary file not shown.
9 changes: 9 additions & 0 deletions man/CARlasso.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions man/mgp154.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f02dccf

Please sign in to comment.