remove some unused functions, commented code etc from old elbo comput…

…atinos
sqsun · Apr 18, 2018 · 9460e90 · 9460e90
1 parent ef191ed
commit 9460e90
Show file tree

Hide file tree

Showing 9 changed files with 142 additions and 118 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: susieR
 Type: Package
 Title: Fit Sum of Single Effects linear regression model
-Version: 0.1.3
+Version: 0.1.4
 Author: Matthew Stephens 
 Maintainer: <[email protected]>
 Description: Fits a sparse regression model with up to $L$ effects, where $L$ is user-specified.

diff --git a/R/elbo.R b/R/elbo.R
@@ -1,26 +1,3 @@
-#this is for scaled prior in which effect prior variance is sa * sigma
-# s is a susie fit
-# a list with elements alpha, mu, sigma2, sa2
-elbo_fn = function(X,Y,s){
-  L = nrow(s$alpha)
-  n = nrow(X)
-  p = ncol(X)
-  d = colSums(X*X) #note this is currently being computed 2 times - here and in Eloglik; could be made more efficient by avoiding this
-
-  ss <- s$mu2 - s$mu^2 # posterior variance (conditional on inclusion)
-  postb2 = s$alpha * s$mu2 # posterior second moment of b
-  Ell = Eloglik(X,Y,s)
-
-  sub = s$alpha>0 # this is to avoid taking 0 * log(0) in next line
-  KL1 = sum(s$alpha[sub] * log(s$alpha[sub]/(1/p)))
-
-  KL2 = - 0.5* rowSums(s$alpha * (1 + log(ss)-log(s$sigma2*s$sa2)))
-  + 0.5 * rowSums(postb2/(s$sigma2*s$sa2))
-  KL2[s$sa2==0] = 0 # deal with 0 prior
-
-  return(Ell - KL1 - sum(KL2))
-}
-
 #' @title Get objective function from data and susie fit object.
 #'
 #' @param data A flash data object.
@@ -33,8 +10,6 @@ susie_get_objective = function(X, Y, s) {
   return(Eloglik(X,Y,s)-sum(s$KL))
 }
 
-
-
 #' @title expected loglikelihood for a susie fit
 Eloglik = function(X,Y,s){
   n = nrow(X)
@@ -43,7 +18,7 @@ Eloglik = function(X,Y,s){
   return(result)
 }
 
-
+# expected squared residuals
 get_ER2 = function(X,Y,s){
   Xr = (s$alpha*s$mu) %*% t(X)
   Xrsum = colSums(Xr)

diff --git a/R/estimate_residual_variance.R b/R/estimate_residual_variance.R
@@ -3,10 +3,6 @@
 #' @param Y an n vector of data
 #' @param s a susie fit
 estimate_residual_variance = function(X,Y, s){
-  # R = get_R(X,Y,s) #residuals
-  # d = colSums(X^2)
-  # post_var = s$alpha*s$mu2 - (s$alpha*s$mu)^2
-  # V = colSums(post_var)
   n = nrow(X)
   return( (1/n)* get_ER2(X,Y,s) )
 }

diff --git a/R/single_effect_regression.R b/R/single_effect_regression.R
@@ -44,35 +44,8 @@ single_effect_regression = function(Y,X,sa2=1,s2=1,optimize_sa2=FALSE){
   post_mean = (d/s2) * post_var * betahat
   post_mean2 = post_var + post_mean^2 # second moment
   loglik = maxlbf + log(mean(w)) + sum(dnorm(Y,0,sqrt(s2),log=TRUE))
-  #SER_loglik(V,Y,X,s2)
-  return(list(alpha=alpha,mu=post_mean,mu2 = post_mean2,lbf=lbf,sa2=V/s2, loglik = loglik))
-}
 
-# compute loglik for the SER
-SER_loglik = function(V,Y,X,s2){
-  d = colSums(X^2)
-  betahat = (1/d) * t(X) %*% Y
-  shat2 = s2/d
-  n = nrow(X)
-
-  lbf = dnorm(betahat,0,sqrt(V+shat2),log=TRUE) - dnorm(betahat,0,sqrt(shat2),log=TRUE)
-
-  maxlbf = max(lbf)
-  w = exp(lbf-maxlbf)
-  return(maxlbf + log(mean(w)) + sum(dnorm(Y,0,sqrt(s2),log=TRUE)))
-}
-
-# very slow brute force version for testing
-SER_loglik2 = function(V,Y,X,s2){
-  n = nrow(X)
-  p = ncol(X)
-  ll = rep(0,p)
-  for(j in 1:p){
-    ll[j] = mvtnorm::dmvnorm(Y,sigma=V*(X[,j] %*% t(X[,j])) + s2*diag(n),log=TRUE)
-  }
-  maxll = max(ll)
-  w = exp(ll-maxll)# w =BF/BFmax
-  return(maxll + log(mean(w)))
+  return(list(alpha=alpha,mu=post_mean,mu2 = post_mean2,lbf=lbf,sa2=V/s2, loglik = loglik))
 }
 
 
@@ -129,50 +102,3 @@ lbf = function(V,shat2,T2){
 
 
 
-
-
-#
-# em_update_prior_variance_single_regression = function(Y,X,sa2,s2){
-#   d = colSums(X^2)
-#   sa2 = s2*sa2 # scale by residual variance
-#   betahat = (1/d) * t(X) %*% Y
-#   shat2 = s2/d
-#
-#   lbf = dnorm(betahat,0,sqrt(sa2+shat2),log=TRUE) - dnorm(betahat,0,sqrt(shat2),log=TRUE)
-#   #log(bf) on each SNP
-#
-#   maxlbf = max(lbf)
-#   w = exp(lbf-max(lbf)) # w is proportional to BF, but subtract max for numerical stability
-#   alpha = w/sum(w) # posterior prob on each SNP
-#
-#   t2 = betahat^2/shat2 # t-squared
-#   # if(min(d)==max(d)){
-#   #       vmax = shat2[1]*(sum(alpha*t2)-1)
-#   # }
-#   #
-#   # this is the derivative of the complete data log-likelihood wrt v = prior variance
-#   cdll_negloglik = function(v){
-#     lbf = dnorm(betahat,0,sqrt(v+shat2),log=TRUE) - dnorm(betahat,0,sqrt(shat2),log=TRUE)
-#     return(-sum(alpha*lbf))
-#   }
-#
-#   cdll_negloglik.logscale = function(v){
-#     lbf = dnorm(betahat,0,sqrt(exp(v)+shat2),log=TRUE) - dnorm(betahat,0,sqrt(shat2),log=TRUE)
-#     return(-sum(alpha*lbf))
-#   }
-#
-#
-#   cdll_negderiv = function(v){-0.5*sum(alpha * (1/(v+shat2)) * ((shat2/(shat2+v))*t2 -1))}
-#   v_upper = max(shat2*(t2-1)) # upper bound on vhat
-#
-#
-#
-#
-#   if(v_upper>0 && cdll_deriv(0)>0){
-#     v_init = v_upper/2
-#     v_opt = optim(log(vinit), cdll_negloglik.logscale, method="BFGS")
-#   #  v_opt = uniroot(cdll_deriv,interval = c(0,v_upper), extendInt = "downX") #$root
-#   } else{}
-#     v_opt = 0
-#   }
-# }
diff --git a/R/susie.R b/R/susie.R
@@ -78,10 +78,7 @@ susie = function(X,Y,L=10,prior_variance=1,residual_variance=NULL,max_iter=100,t
 
   #intialize elbo to NA
   elbo = rep(NA,max_iter+1)
-  elbo2 = rep(NA,max_iter+1)
-
   elbo[1] = -Inf;
-#  elbo2[1] = -Inf;
 
   for(i in 1:max_iter){
     #s = add_null_effect(s,0)
@@ -99,13 +96,10 @@ susie = function(X,Y,L=10,prior_variance=1,residual_variance=NULL,max_iter=100,t
     }
     #s = remove_null_effects(s)
 
- #   elbo2[i+1] = susie_get_objective(X,Y,s) #
     elbo[i+1] = susie_get_objective(X,Y,s)
     if((elbo[i+1]-elbo[i])<tol) break;
   }
   elbo = elbo[1:(i+1)] #remove trailing NAs
-
   s$elbo <- elbo
-  s$elbo2 <- elbo2
   return(s)
 }
diff --git a/R/susie_vbupdate.R b/R/susie_vbupdate.R
@@ -8,7 +8,7 @@ update_each_effect <- function (X, Y, s_init, estimate_prior_variance=FALSE) {
   # Repeat for each effect to update
   s = s_init
   L = nrow(s$alpha)
- # s$Xr = X %*% colSums(s$alpha * s$mu) # should not need - check !!
+
   if(L>0){
     for (l in 1:L){
     # remove lth effect from fitted values
@@ -26,7 +26,6 @@ update_each_effect <- function (X, Y, s_init, estimate_prior_variance=FALSE) {
       s$sa2[l] <- res$sa2
       s$KL[l] <- -res$loglik + SER_posterior_e_loglik(X,R,s$sigma2,res$alpha*res$mu,res$alpha*res$mu2)
 
-      #print(susie_get_objective(X,Y,s))
       s$Xr <- s$Xr + X %*% (s$alpha[l,]*s$mu[l,])
     }
   }

diff --git a/vignettes/intro.Rmd b/vignettes/intro.Rmd
@@ -53,7 +53,7 @@ for(i in 1:(n-1)){
 beta = c(rep(0,100),rep(1,100),rep(3,100),rep(-2,100),rep(0,600))
 y = beta + rnorm(n)
 delta[,2:1000] = scale(delta[,2:1000])
-s = susie(delta,y,L=10,sigma2=1)
+s = susie(delta,y,L=10)
 ```
 
 Plot results: the truth is green, and susie estimate is red.
@@ -68,9 +68,9 @@ s$sigma2
 Try something harder where the beta increases linearly:
 ```{r}
 set.seed(1)
-beta = seq(0,1,length=1000)
+beta = seq(0,2,length=1000)
 y = beta + rnorm(n)
-s = susie(delta,y,sigma2=1,L=10)
+s = susie(delta,y,L=10)
 plot(y)
 lines(predict(s),col=2,lwd=3)
 lines(beta,col=3,lwd=3)
@@ -93,7 +93,7 @@ What happens if we have trend plus sudden change.
 ```{r}
 beta = beta + c(rep(0,500),rep(2,500))
 y = beta + rnorm(n)
-s = susie(delta,y,sigma2=1,L=10)
+s = susie(delta,y,L=10)
 plot(y)
 lines(predict(s),col=2,lwd=3)
 lines(beta,col=3,lwd=3)

diff --git a/vignettes/optimize.Rmd b/vignettes/optimize.Rmd
@@ -0,0 +1,63 @@
+---
+title: "optimize"
+author: "Matthew Stephens"
+date: "4/15/2018"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+Diagnose optimization issues with Lei's example
+```{r}
+set.seed(777)
+devtools::load_all(".")
+X <- matrix(rnorm(1010 * 1000), 1010, 1000)
+beta <- rep(0, 1000)
+beta[1 : 200] <- 100
+y <- X %*% beta + rnorm(1010)
+s = susie(X,y,L=1,estimate_sa2 = TRUE)
+Y = y-s$Xr
+s2 = s$sigma2
+x = seq(1,100000,length=100)
+l  = rep(0,100)
+lg = rep(0,100)
+for(i in 1:100){
+  l[i] = loglik(x[i],Y,X,s2)
+  lg[i] = loglik.grad(x[i],Y,X,s2)
+}
+plot(x,l)
+plot(x,lg)
+# > which.max(l)
+# [1] 23
+# > lg[23]
+# [1] -2.398905e-07
+# > lg[22]
+# [1] 6.282734e-07
+
+lx = log(x)
+l2=l
+lg2=lg
+for(i in 1:100){
+  l2[i] = negloglik.logscale(lx[i],Y,X,s2)
+  lg2[i] = negloglik.grad.logscale(lx[i],Y,X,s2)
+}
+plot(lx,l2)
+plot(lx,lg2)
+
+y = seq(0,20,length=100)
+l3=l2
+lg3=lg2
+for(i in 1:100){
+  l3[i] = negloglik.logscale(y[i],Y,X,s2)
+  lg3[i] = negloglik.grad.logscale(y[i],Y,X,s2)
+}
+plot(y,l3)
+plot(y,lg3)
+uniroot(negloglik.grad.logscale,c(-20,20),extendInt = "upX",Y=Y,X=X,s2=s2)
+
+```
+
+So, to summarize, problem seems to be that optim has issues with
+very flat initial gradient near 0.
diff --git a/vignettes/testing.Rmd b/vignettes/testing.Rmd
@@ -0,0 +1,71 @@
+---
+title: "test.Rmd"
+author: "Matthew Stephens"
+date: "4/14/2018"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+# simulate data
+
+This is Lei's example
+```{r}
+set.seed(777)
+library(susieR)
+X <- matrix(rnorm(1010 * 1000), 1010, 1000)
+beta <- rep(0, 1000)
+beta[1 : 200] <- 100
+y <- X %*% beta + rnorm(1010)
+s = susie(X,y,L=200)
+
+plot(coef(s),beta)
+s$sigma2
+
+# fit <- lm(y ~ X - 1)
+# mlr.p <- log(summary(fit)$coefficients[, 4])
+# 
+mar.p <- c()
+mar.betahat = c()
+for (i in 1 : 1000) {
+ fit <- lm(y ~ X[, i] - 1)
+  mar.p[i] <- log(summary(fit)$coefficients[, 4])
+  mar.betahat[i] <- summary(fit)$coefficients[, 1]
+}
+# 
+# pdf("pvalue.pdf", width = 10, height = 5)
+# par(mfrow = c(1, 2))
+# plot(mlr.p, ylab = "log(p-value)", main = "Multiple Linear Regression")
+# abline(h = log(0.05 / 1000), lty = 2, col = "red")
+# legend("right", lty = 2, col = "red", "log(0.05/p)")
+# 
+# plot(mar.p, ylab = "log(p-value)", main = "One-on-One Linear Regression")
+# abline(h = log(0.05 / 1000), lty = 2, col = "red")
+```
+
+Notice that the coefficients are monotonic with betahat. Some shrinkage of zero values is evident, but it is not enough... presumably because sigma2 is way over-estimated. And further we see excessive shrinkage of true signals, presumably because sa2 is too small.
+```{r}
+plot(coef(s),mar.betahat)
+```
+
+
+Here we try fixing sigma2 to true value.
+```{r}
+s2 = susie(X,y,L=300,sigma2=1,estimate_sigma2=FALSE)
+plot(coef(s2),beta)
+```
+it works!!
+
+```{r}
+plot(s$alpha[1,])
+plot(s$alpha[2,])
+```
+
+Try with larger prior on effect sizes
+```{r}
+s3 = susie(X,y,L=200, sa2 = 100)
+```
+
+