-
Notifications
You must be signed in to change notification settings - Fork 1
/
fixedeffects.R
101 lines (84 loc) · 2.59 KB
/
fixedeffects.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
##### Example #####
## Show the data
require(graphics)
pairs(airquality[, 1:5], panel=panel.smooth, main="Air Quality data")
## Comparison
library(lfe)
summary(felm(Ozone ~ Solar.R + Wind + Temp | Month, data=airquality))
## Setup yy and XX inputs
yy <- airquality$Ozone
XX <- airquality[, c('Solar.R', 'Wind', 'Temp')]
groups <- airquality$Month
do.efficient <- F
##### Standard Structure #####
library(rstan)
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
## Code for a simple multivariate regression model
stan.code.simple <- "
data {
int<lower=0> N; // number of observations
int<lower=0> K; // number of predictors
matrix[N, K] x; // predictor matrix
vector[N] y; // outcome vector
int<lower=0> L; // number of FE classes
int<lower=1, upper=L> groups[N]; // the groups themselves
}
parameters {
vector[K] beta; // other coefficients
vector[L] alphas; // fixed effects
real<lower=0> sigma; // sd of errors
}
model {
y ~ normal(x * beta + alphas[groups], sigma);
}"
## This needs to be used with demeaning
stan.code.faster <- "
data {
int<lower=0> N; // number of observations
int<lower=0> K; // number of predictors
matrix[N, K] x; // predictor matrix
vector[N] y; // outcome vector
}
transformed data {
matrix[N, K] Q_ast;
matrix[K, K] R_ast;
matrix[K, K] R_ast_inverse;
// thin and scale the QR decomposition
Q_ast = qr_Q(x)[, 1:K] * sqrt(N - 1);
R_ast = qr_R(x)[1:K, ] / sqrt(N - 1);
R_ast_inverse = inverse(R_ast);
}
parameters {
real alpha; // intercept
vector[K] theta; // coefficients on Q_ast
real<lower=0> sigma; // sd of errors
}
model {
y ~ normal(Q_ast * theta + alpha, sigma);
}
generated quantities {
vector[K] beta;
beta = R_ast_inverse * theta; // coefficients on x
}"
## Drop observations with NAs
valid <- rowSums(is.na(XX)) + is.na(yy) + is.na(groups) == 0
## Prepare groups for FEs
valid.groups <- factor(groups[valid])
if (do.efficient) {
allentries <- list(yy[valid])
for (ii in 1:ncol(XX))
allentries[[ii+1]] <- XX[valid, ii]
demeaned <- demeanlist(allentries, list(valid.groups))
dmyy <- demeaned[[1]]
dmXX <- demeaned[[2]]
for (ii in 2:ncol(XX))
dmXX <- cbind(dmXX, demeaned[[ii+1]])
stan.data <- list(N=sum(valid), K=ncol(XX), x=dmXX, y=dmyy)
fit <- stan(model_code=stan.code.faster, data=stan.data)
} else {
## Setup data to send to Stan
stan.data <- list(N=sum(valid), K=ncol(XX), x=XX[valid,], y=yy[valid], L=length(levels(valid.groups)), groups=as.numeric(valid.groups))
## Fit the model
fit <- stan(model_code=stan.code.simple, data=stan.data)
}