-
Notifications
You must be signed in to change notification settings - Fork 1
/
etf_pca.R
72 lines (56 loc) · 2.75 KB
/
etf_pca.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
prices[,fullday_delta:=close/lag1close]
key_etfs = prices[,
.(v2021 = mean(ifelse(year(date)==2021, volume, NA),na.rm=T),
vol2021 = sd(ifelse(year(date)==2021, fullday_delta, NA),na.rm=T),
N2021 = length(na.omit(ifelse(year(date)==2021,fullday_delta,NA))),
years = length(unique(year(date)))),
symbol][v2021>1000000 & N2021>50 & symbol!='DWFI' & years>7]
key_etf_wide = prices[symbol %in% key_etfs$symbol & !is.na(fullday_delta) ] %>%
dcast(date~symbol, value.var='fullday_delta',fun.aggregate = mean)
max_not_one = function(x)max(x[x!=1])
cols_to_exclude=c('date')
max_=1
etf_corrs = data.frame(key_etf_wide)[,!names(key_etf_wide) %in% cols_to_exclude]%>%
cor(use='pairwise.complete') %>%
data.table
while(max_>.6){
etf_corrs_incl = data.frame(etf_corrs)[!colnames(etf_corrs) %in% cols_to_exclude,!colnames(etf_corrs) %in% cols_to_exclude]
most_correlated_var_i = which.max(apply(etf_corrs_incl,1,max_not_one))
corrs_to_most_correlated = etf_corrs_incl[,most_correlated_var_i]
cols_to_exclude=c(cols_to_exclude,colnames(etf_corrs_incl)[corrs_to_most_correlated>.6 & corrs_to_most_correlated<1])
print(length(c('excluding:',cols_to_exclude)))
max_=max(ifelse(etf_corrs_incl[,most_correlated_var_i]==1,0,etf_corrs_incl[,most_correlated_var_i]))
print(max_)
}
names(key_etf_wide)[!names(key_etf_wide) %in% cols_to_exclude]%>%paste(collapse = "','")
x = key_etf_wide[year(date)<2022,.SD,.SDcols=key_etfs$symbol] %>%
na.omit() %>%
princomp(center=T,scale.=T,na.action=na.pass)
get_pca_deviations = function(dat, model, k=10){
day_loads = predict(dat[,.SD,.SDcols=key_etfs], object = x)
outs = data.table(scale(dat[,.SD,.SDcols=key_etfs],center=model$center, scale=model$scale) -
day_loads[,1:k] %*% t(model$rotation[,1:k]))
outs$date = dat$date
outs %>% melt(id.vars='date')
}
y = key_etf_wide[year(date) == 2022] %>%
get_pca_deviations(model=x, k=10)
c('VT','VIXM','KBWB')
x = psych::principal(key_etf_wide[,.SD,.SDcols=key_etfs$symbol]
,nfactors=10,rotate='promax')
data.table(as.data.frame.matrix(x$loadings),keep.rownames = T)[order(RC1),.(rn,RC1)]
# QQQ <> PSQ -- NASDAQ 100
# VGIT <> TTT -- treasuries
# USO <> SCO -- Oil
# YINN <> YANG -- China
# ARKK <
# XLU
# GDXJ <> DUST
# SLQD
#PROMAX#
key_etfs = c('safe large cap'='MGK', 'large govnt bonds'='IEF', 'oil'='USO',
'gold'='RING', 'china'='FXI', 'tech'='ARKK',
'near term bonds'='SLQD', 'small cap value'='IJS', 'utilities'='XLU')
data.table(key_etf = key_etfs[apply(etf_corrs[,.SD,.SDcols=key_etfs],1,which.max)],
key_etf_corr = apply(etf_corrs[,.SD,.SDcols=key_etfs],1,function(x)max(abs(x)) ),
etf_name = names(etf_corrs))[,.(mean(key_etf_corr>.5),mean(key_etf_corr))]