misc

jrnold · Apr 22, 2017 · d6de20e · d6de20e
1 parent be96865
commit d6de20e
Show file tree

Hide file tree

Showing 8 changed files with 393 additions and 76 deletions.
diff --git a/Untitled.Rmd b/Untitled.Rmd
@@ -0,0 +1,56 @@
+# Heteroskedasticity and Robust Regression
+
+## Linear Regression with Student t distributed errors
+
+```{r}
+unionization <- read_tsv("data/western1995/unionization.tsv",
+         col_types = cols(
+              country = col_character(),
+              union_density = col_double(),
+              left_government = col_double(),
+              labor_force_size = col_number(),
+              econ_conc = col_double()
+            ))
+
+data <- preprocess_lm(union_density ~ left_government + labor_force_size + econ_conc,
+                      data = unionization)
+
+
+
+```
+
+## Heteroskedasticity
+
+In applied regression, heteroskedasticity consistent (HC) or robust standard errors are often used.
+
+However, there is straighforwardly direct translation of HC standard error to regression model this in a Bayesian setting. The sandwich method of estimating HC errors uses the same point estimates for the regression coefficients as OLS, but estimates the standard errors of those coefficients in a second stage from the OLS residuals. 
+Disregarding differences in frequentist vs. Bayesian inference, it is clear that a direct translation of that method could not be fully Bayesian since the coefficients and errors are not estimated jointly.
+
+Let's derivive a linear normal model, but with heteroskedasticity:
+$$
+\begin{aligned}[t]
+y_i &\sim N(X \beta, \sigma_i)
+\end{aligned}
+$$
+This model has a separate scale parameter, $\sigma_i$, for each observation.
+It should be clear that without proper priors this model is not identified (the posterior distribution is improper).
+
+But it should also clear how we can proceed with heteroskedasticity---by modeling $\sigma_i$.
+The model of $\sigma_i$ should encode any information we have about the heteroskedasticity.
+This can be priors on how different the $\sigma_i$ are, or even covariates for how we think observations may have different values.
+
+It is useful to rewrite the scale parameter of the above equation as a combination of a global scale parameter ($\sigma$) and local scale parameters, $\lambda_i$.
+$$
+y_i \sim \dnorm(X\beta, \lambda_i \sigma)
+$$
+If the $\lambda_i$ paramter is distributed ... then the model above is equivalent to 
+$$
+y_i \sim \dt(\nu, X \beta, \sigma)
+$$
+
+
+
+
+## Notes
+
+See @GelmanHill2008a [sec 6.6], @BDA2013 [ch 17]
diff --git a/_common.R b/_common.R
@@ -50,3 +50,22 @@ standoc <- function(x = NULL) {
     paste("[", x, "](", STAN_MAN_URL, ")")
   }
 }
+
+preprocess_lm <- function(formula., data = NULL, weights = NULL,
+                          contrasts = NULL, na.action = options("na.action"),
+                          offset = NULL, ...) {
+  mf <- lm(formula., data = data, method = "model.frame",
+           weights = weights, offset = offset, ...)
+  mt <- attr(mf, "terms")
+  out <- list(
+    y = model.response(mf, "numeric"),
+    w =  as.vector(model.weights(mf)),
+    offset = as.vector(model.offset(mf)),
+    X = model.matrix(mt, mf, contrasts),
+    terms = mt,
+    xlevels = stats::.getXlevels(mt, mf)
+  )
+  out$n <- nrow(out$X)
+  out$k <- ncol(out$X)
+  out
+}
diff --git a/bayes.bib b/bayes.bib
@@ -1585,45 +1585,7 @@ @Book{MarinRobert2007a
   timestamp = {2017-04-06},
 }
 
-@Article{,
-  author       = {Carpenter, Bob},
-  title        = {Typical Sets and the Curse of Dimensionality},
-  journaltitle = {Stan Case Studies},
-  date         = {2017},
-  timestamp    = {2017-04-15},
-}
-
-@Article{,
-  author       = {Carpenter, Bob and Gabry, Jonah and Goodrich, Ben},
-  title        = {Hierarchical Partial Pooling for Repeated Binary Trials},
-  journaltitle = {Stan Case Studies},
-  date         = {2017-01-19},
-  url          = {http://mc-stan.org/documentation/case-studies/pool-binary-trials-rstanarm.html},
-  urldate      = {2017-04-15},
-  timestamp    = {2017-04-15},
-}
-
-@Article{Betancourt2017a,
-  author       = {Betancourt, Michael},
-  title        = {How the Shape of a Weakly Informative Prior Affects Inferences},
-  journaltitle = {Stan Case Studies},
-  date         = {2017-01},
-  url          = {http://mc-stan.org/documentation/case-studies/weakly_informative_shapes.html},
-  urldate      = {2017-04-15},
-  timestamp    = {2017-04-15},
-}
-
-@Article{,
-  author       = {Diagnosing Biased Inference with Divergences},
-  title        = {Michael Betancourt},
-  journaltitle = {Stan Case Studies},
-  date         = {2017-01},
-  url          = {http://mc-stan.org/documentation/case-studies/divergences_and_bias.html},
-  urldate      = {2017-04-15},
-  timestamp    = {2017-04-15},
-}
-
-@Article{,
+@Article{Betancourt2017b,
   author       = {Betancourt, Michael},
   title        = {Identifying Bayesian Mixture Models},
   journaltitle = {Stan Case Studies},
@@ -1632,25 +1594,6 @@ @Article{
   timestamp    = {2017-04-15},
 }
 
-@Article{,
-  title        = {A primer on {Bayesian} multilevel modeling using {PyStan}},
-  journaltitle = {Stan Case Studies},
-  date         = {2016},
-  url          = {http://mc-stan.org/documentation/case-studies/radon.html},
-  urldate      = {2017-04-15},
-  timestamp    = {2017-04-15},
-}
-
-@Article{,
-  author       = {Carpenter, Bob},
-  title        = {The Impact of Reparameterization on Point Estimates},
-  journaltitle = {Stan Case Studies},
-  date         = {2016-04-25},
-  url          = {http://mc-stan.org/documentation/case-studies/mle-params.html},
-  urldate      = {2017-04-15},
-  timestamp    = {2017-04-15},
-}
-
 @Article{10.2307/24306036,
   author    = {Andrew Gelman and Xiao-Li Meng and Hal Stern},
   title     = {Posterior predictive assessment of model fitness via realized discrepancies},
@@ -1666,6 +1609,19 @@ @Article{10.2307/24306036
   timestamp = {2017-04-20},
 }
 
+@Article{VehtariGelmanGabry2015b,
+  author      = {Aki Vehtari and Andrew Gelman and Jonah Gabry},
+  title       = {Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC},
+  date        = {2015-07-16},
+  doi         = {10.1007/s11222-016-9696-4},
+  eprint      = {1507.04544v5},
+  eprintclass = {stat.CO},
+  eprinttype  = {arXiv},
+  abstract    = {Leave-one-out cross-validation (LOO) and the widely applicable information criterion (WAIC) are methods for estimating pointwise out-of-sample prediction accuracy from a fitted Bayesian model using the log-likelihood evaluated at the posterior simulations of the parameter values. LOO and WAIC have various advantages over simpler estimates of predictive error such as AIC and DIC but are less used in practice because they involve additional computational steps. Here we lay out fast and stable computations for LOO and WAIC that can be performed using existing simulation draws. We introduce an efficient computation of LOO using Pareto-smoothed importance sampling (PSIS), a new procedure for regularizing importance weights. Although WAIC is asymptotically equal to LOO, we demonstrate that PSIS-LOO is more robust in the finite case with weak priors or influential observations. As a byproduct of our calculations, we also obtain approximate standard errors for estimated predictive errors and for comparing of predictive errors between two models. We implement the computations in an R package called 'loo' and demonstrate using models fit with the Bayesian inference package Stan.},
+  file        = {online:http\://arxiv.org/pdf/1507.04544v5:PDF},
+  keywords    = {stat.CO, stat.ME},
+}
+
 @Article{Hallnd,
   author    = {{Statisticat, LLC}},
   title     = {Bayesian Inference},
@@ -1674,6 +1630,16 @@ @Article{Hallnd
   timestamp = {2017-04-20},
 }
 
+@Article{Gelman2014a,
+  author    = {Andrew Gelman},
+  title     = {Discussion with Sander Greenland on posterior predictive checks},
+  journal   = {Statistical Modeling, Causal Inference, and Social Science},
+  date      = {2014-08-11},
+  url       = {http://andrewgelman.com/2014/08/11/discussion-sander-greenland-posterior-predictive-checks/},
+  urldate   = {2017-04-20},
+  timestamp = {2017-04-20},
+}
+
 @WWW{Gelman2009a,
   author    = {Andrew Gelman},
   title     = {Confusions about posterior predictive checks},
@@ -1684,27 +1650,80 @@ @WWW{Gelman2009a
   timestamp = {2017-04-20},
 }
 
-@Article{Gelman2014a,
-  author    = {Andrew Gelman},
-  title     = {Discussion with Sander Greenland on posterior predictive checks},
-  journal   = {Statistical Modeling, Causal Inference, and Social Science},
-  date      = {2014-08-11},
-  url       = {http://andrewgelman.com/2014/08/11/discussion-sander-greenland-posterior-predictive-checks/},
+@Article{Divergences2017a,
+  author       = {Diagnosing Biased Inference with Divergences},
+  title        = {Michael Betancourt},
+  journaltitle = {Stan Case Studies},
+  date         = {2017-01},
+  url          = {http://mc-stan.org/documentation/case-studies/divergences_and_bias.html},
+  urldate      = {2017-04-15},
+  timestamp    = {2017-04-15},
+}
+
+@Article{CarpenterGabryGoodrich2017a,
+  author       = {Carpenter, Bob and Gabry, Jonah and Goodrich, Ben},
+  title        = {Hierarchical Partial Pooling for Repeated Binary Trials},
+  journaltitle = {Stan Case Studies},
+  date         = {2017-01-19},
+  url          = {http://mc-stan.org/documentation/case-studies/pool-binary-trials-rstanarm.html},
+  urldate      = {2017-04-15},
+  timestamp    = {2017-04-15},
+}
+
+@Article{Carpenter2017a,
+  author       = {Carpenter, Bob},
+  title        = {Typical Sets and the Curse of Dimensionality},
+  journaltitle = {Stan Case Studies},
+  date         = {2017},
+  timestamp    = {2017-04-15},
+}
+
+@Article{Carpenter2016a,
+  author       = {Carpenter, Bob},
+  title        = {The Impact of Reparameterization on Point Estimates},
+  journaltitle = {Stan Case Studies},
+  date         = {2016-04-25},
+  url          = {http://mc-stan.org/documentation/case-studies/mle-params.html},
+  urldate      = {2017-04-15},
+  timestamp    = {2017-04-15},
+}
+
+@Article{Betancourt2017a,
+  author       = {Betancourt, Michael},
+  title        = {How the Shape of a Weakly Informative Prior Affects Inferences},
+  journaltitle = {Stan Case Studies},
+  date         = {2017-01},
+  url          = {http://mc-stan.org/documentation/case-studies/weakly_informative_shapes.html},
+  urldate      = {2017-04-15},
+  timestamp    = {2017-04-15},
+}
+
+@Article{2016a,
+  title        = {A primer on {Bayesian} multilevel modeling using {PyStan}},
+  journaltitle = {Stan Case Studies},
+  date         = {2016},
+  url          = {http://mc-stan.org/documentation/case-studies/radon.html},
+  urldate      = {2017-04-15},
+  timestamp    = {2017-04-15},
+}
+
+@Book{Stan2016a,
+  author    = {{Stan Development Team}},
+  title     = {Stan Modeling Language Users Guide and Reference Manual, Version 2.14.0},
+  year      = {2016},
+  url       = {https://github.com/stan-dev/stan/releases/download/v2.14.0/stan-reference-2.14.0.pdf},
   urldate   = {2017-04-20},
-  timestamp = {2017-04-20},
+  timestamp = {2017.04.20},
 }
 
-@Article{,
-  author      = {Aki Vehtari and Andrew Gelman and Jonah Gabry},
-  title       = {Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC},
-  date        = {2015-07-16},
-  doi         = {10.1007/s11222-016-9696-4},
-  eprint      = {1507.04544v5},
-  eprintclass = {stat.CO},
-  eprinttype  = {arXiv},
-  abstract    = {Leave-one-out cross-validation (LOO) and the widely applicable information criterion (WAIC) are methods for estimating pointwise out-of-sample prediction accuracy from a fitted Bayesian model using the log-likelihood evaluated at the posterior simulations of the parameter values. LOO and WAIC have various advantages over simpler estimates of predictive error such as AIC and DIC but are less used in practice because they involve additional computational steps. Here we lay out fast and stable computations for LOO and WAIC that can be performed using existing simulation draws. We introduce an efficient computation of LOO using Pareto-smoothed importance sampling (PSIS), a new procedure for regularizing importance weights. Although WAIC is asymptotically equal to LOO, we demonstrate that PSIS-LOO is more robust in the finite case with weak priors or influential observations. As a byproduct of our calculations, we also obtain approximate standard errors for estimated predictive errors and for comparing of predictive errors between two models. We implement the computations in an R package called 'loo' and demonstrate using models fit with the Bayesian inference package Stan.},
-  file        = {online:http\://arxiv.org/pdf/1507.04544v5:PDF},
-  keywords    = {stat.CO, stat.ME},
+@InCollection{Geyer2011a,
+  author    = {Geyer, C. J.},
+  title     = {Introduction to Markov chain Monte Carlo},
+  booktitle = {Handbook of Markov Chain Monte Carlo},
+  year      = {2011},
+  editor    = {Brooks, S. and Gelman and A., Jones, G. L. and Meng, X.-L},
+  publisher = {Chapman and Hall/CRC},
+  timestamp = {2017.04.20},
 }
 
 @Comment{jabref-meta: databaseType:biblatex;}
diff --git a/data/western1995/econ_growth.tsv b/data/western1995/econ_growth.tsv
@@ -0,0 +1,16 @@
+country	econ_growth	labor_org	social_dem
+Australia	.51	1.87	30.5
+Austria	.64	3.06	100.0
+Belgium	.44	2.80	21.0
+Canada	.50	.98	.0
+Denmark	.36	2.77	75.5
+Finland	.56	2.76	40.2
+France	.57	.68	1.7
+Germany	.53	1.80	74.8
+Holland	.44	1.90	41.2
+Italy	.53	1.47	6.5
+Japan	.38	.43	.0
+Norway	1.05	3.33	100.0
+Sweden	.44	3.52	45.9
+United Kingdom	.26	1.81	86.0
+United States	.51	.82	.0
diff --git a/data/western1995/income_ineq.tsv b/data/western1995/income_ineq.tsv
@@ -0,0 +1,19 @@
+country	inequality	turnout	energy	socialism
+Argentina	2.960	61.8	1,088	2.3
+Australia	1.940	85.3	3,918	45.0
+Denmark	2.734	86.8	2,829	41.8
+Finland	4.441	82.1	1,650	24.9
+France	5.653	66.5	2,419	25.1
+Germany	3.435	77.6	3,673	27.1
+Israel	1.950	84.1	1,243	50.8
+Italy	2.196	89.2	1,135	17.0
+Japan	3.007	72.3	1,166	27.5
+Netherlands	3.457	87.9	2,691	30.8
+Norway	2.440	81.9	2,740	52.0
+Puerto	Rico	3.693	73.3	1,453	0.0
+South	Africa	9.410	14.3	2,338	1.8
+Sweden	3.143	78.1	3,491	48.5
+Trinidad and Tobago	3.888	64.7	1,935	18.8
+United Kingdom	2.876	72.4	4,907	48.5
+United States	2.296	56.8	8,047	0.0
+Venezuela	3.515	78.8	2,623	28.7
diff --git a/data/western1995/unionization.tsv b/data/western1995/unionization.tsv
@@ -0,0 +1,21 @@
+country	union_density	left_government	labor_force_size	econ_conc
+Sweden	82.4	111.84	3,931	1.55
+Israel	80.0	73.17	997	1.71
+Iceland	74.3	17.25	81	2.06
+Finland	73.3	59.33	2,034	1.56
+Belgium	71.9	43.25	3,348	1.52
+Denmark	69.8	90.24	2,225	1.52
+Ireland	68.1	.00	886	1.75
+Austria	65.6	48.67	2,469	1.53
+New Zealand	59.4	60.00	1,050	1.64
+Norway	58.9	83.08	1,657	1.58
+Australia	51.4	33.74	5,436	1.37
+Italy	50.6	.00	15,819	.86
+United Kingdon	48.0	43.67	25,757	1.13
+Germany	39.6	35.33	23,003	.92
+Netherlands	37.7	31.50	4,509	1.25
+Switzerland	35.4	11.87	2,460	1.68
+Canada	31.2	.00	10,516	1.35
+Japan	31.0	1.92	39,930	1.11
+France	28.2	8.67	18,846	.95
+United States	24.5	.00	92,899	1.00