library(tidyverse)
library(lubridate)
library(scales)
library(ggrepel)
library(tidyquant)
library(jsonlite)
theme_set(theme_minimal())
invisible(Sys.setlocale("LC_TIME", "en_US.UTF-8"))

library(knitr)
library(kableExtra)
mbb <- tq_get("MBB", from = as.Date("1900-01-01")) 
agg <- tq_get("AGG", from = as.Date("1900-01-01")) 
rem <- tq_get("REM", from = as.Date("1900-01-01")) 
usrt <- tq_get("USRT", from = as.Date("1900-01-01")) 
qclr <- tq_get("QCLR", from = as.Date("1900-01-01")) 
qqq <- tq_get("QQQ", from = as.Date("1900-01-01")) 
qqqm <- tq_get("QQQM", from = as.Date("1900-01-01")) 
xclr <- tq_get("XCLR", from = as.Date("1900-01-01")) 
roro <- tq_get("RORO", from = as.Date("1900-01-01")) 
jojo <- tq_get("JOJO", from = as.Date("1900-01-01")) 
tlt <- tq_get("TLT", from = as.Date("1900-01-01")) 
spy <- tq_get("SPY", from = as.Date("1900-01-01")) 
stnc <- tq_get("STNC", from = as.Date("1900-01-01")) 
rpar <- tq_get("RPAR", from = as.Date("1900-01-01")) 
boxx <- tq_get("BOXX", from = as.Date("1900-01-01")) 
sdhy <- tq_get("SDHY", from = as.Date("1900-01-01")) 
sgov <- tq_get("SGOV", from = as.Date("1900-01-01")) 
bil <- tq_get("BIL", from = as.Date("1900-01-01")) 
atacx <- tq_get("ATACX", from = as.Date("1900-01-01")) 

etfs <- bind_rows(mbb, agg, rem, usrt, qclr, qqq, qqqm, xclr, roro, jojo, tlt, spy, stnc, rpar, boxx, sdhy, sgov, bil, atacx)

saveRDS(etfs, "etfs/etfs.RDS")
etfs <- readRDS("etfs/etfs.RDS")

rets <- etfs |> 
  group_by(symbol) |> 
  transmute(symbol, date, r = adjusted/lag(adjusted)-1) |> 
  drop_na() |> 
  ungroup()
rets |> 
  group_by(symbol) |>
  summarise(min_date = min(date)) |> 
  ggplot(aes(x = min_date, y = reorder(symbol, min_date))) +
  geom_label(aes(label = symbol), size = 3) +
  scale_x_date(date_breaks = "2 year", date_labels = "%Y") +
  labs(title = "ETFs", x = "First available date", y = NULL)

Principal Component Analysis

rets_pca <- rets |> 
  pivot_wider(names_from = symbol, values_from = r) |> 
  arrange(date) |> 
  filter(date >= as.Date("2021-08-27")) |>
  select(-BOXX) |> 
  select(-date)

pca <- prcomp(rets_pca, scale. = T, center = T)

rots <- pca$rotation |> 
  as.data.frame() |> 
  rownames_to_column("symbol") |> 
  as_tibble() |> 
  mutate_if(is.numeric, round, 2)
# Get max absolute value for scaling
max_abs <- max(abs(rots[,-1]))

# Function to generate transparent-to-green gradient
get_color <- function(value, max_value) {
  intensity <- rescale(abs(value), to = c(0, 1), from = c(0, max_value))  
  colorRampPalette(c("white", "green"))(100)[round(intensity * 99) + 1]  # Pick color based on scaled intensity
}

# Apply bold & color gradient
rots_bold <- rots %>%
  mutate(across(-symbol, ~ cell_spec(as.character(.), 
                                     bold = abs(.) == max(abs(.)),  
                                     background = get_color(., max_abs))))


# Render the table
rots_bold |> 
  knitr::kable(format = "html", escape = FALSE) |> 
  kableExtra::kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover"))
symbol PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 PC13 PC14 PC15 PC16 PC17 PC18
MBB -0.23 -0.38 0.02 0.1 -0.03 -0.05 -0.11 0.11 -0.02 0.22 0.25 0.13 0.5 0.1 -0.34 0.12 0.5 0
AGG -0.22 -0.41 0.02 0.12 -0.05 -0.03 -0.1 0.06 0.06 0.09 0.15 0.09 0.25 0 0.05 -0.23 -0.77 0
REM -0.27 0.06 0.03 -0.37 0.25 -0.25 -0.04 0.14 -0.54 0.23 0.04 -0.5 0.05 0.15 0.17 0.02 -0.04 0
USRT -0.27 0.07 0.02 -0.36 0.21 -0.35 -0.19 0.26 0.14 -0.25 -0.32 0.37 0.1 -0.42 -0.04 0.08 0.01 0
QCLR -0.24 0.2 -0.02 0.39 -0.25 0.12 -0.03 0.28 -0.55 -0.1 -0.34 0.34 -0.03 0.22 -0.02 -0.03 -0.02 0
QQQ -0.29 0.26 -0.03 0.17 -0.24 0.05 -0.01 -0.01 0.12 0.05 0.11 -0.21 0.11 -0.29 0.09 0.28 -0.05 0.7
QQQM -0.29 0.26 -0.03 0.17 -0.24 0.06 -0.01 0 0.11 0.05 0.11 -0.21 0.11 -0.29 0.09 0.27 -0.04 -0.71
XCLR -0.14 0.15 -0.04 0.54 0.8 0.01 -0.04 -0.13 0.06 0.06 -0.01 0 0 -0.02 0.01 0 0 0
RORO -0.25 -0.05 -0.01 0.05 -0.15 -0.44 0.21 -0.76 -0.11 -0.13 -0.18 0.05 0.03 0.07 -0.14 0 -0.01 0
JOJO -0.22 -0.33 0.02 0.02 0.05 0.31 0.2 0.13 0.17 -0.34 -0.48 -0.49 0.01 0.04 -0.27 0.01 0.01 0
TLT -0.18 -0.45 0.04 0.15 -0.05 -0.03 -0.04 -0.02 0.04 -0.09 -0.04 0.03 -0.19 -0.05 0.77 -0.02 0.31 0
SPY -0.3 0.26 -0.01 0 -0.11 -0.03 -0.04 0.05 0.2 0.02 0.11 -0.11 -0.04 -0.04 -0.05 -0.84 0.23 0.01
STNC -0.29 0.21 0.02 -0.12 0 -0.1 -0.07 0.09 0.44 -0.14 0.1 0.08 -0.06 0.74 0.1 0.23 -0.06 0
RPAR -0.29 -0.22 0.05 0.03 -0.02 -0.04 -0.11 0.07 -0.04 0.15 0.23 0.06 -0.78 -0.11 -0.37 0.12 -0.02 0
SDHY -0.23 0.04 0.01 -0.33 0.08 0.65 -0.43 -0.43 -0.1 0.05 -0.06 0.16 0.04 0 0.04 0 0 0
SGOV -0.02 -0.05 -0.7 -0.03 0.05 0.02 -0.05 0.01 -0.18 -0.56 0.38 -0.04 0.01 -0.02 -0.03 0 0.01 0
BIL -0.02 -0.05 -0.7 -0.05 -0.05 -0.04 0.03 0.02 0.18 0.56 -0.38 0.03 -0.05 0.04 0.03 0.01 0 0
ATACX -0.25 0.03 0 -0.23 0.15 0.26 0.8 0.05 -0.04 0.09 0.19 0.3 0.02 -0.07 0.08 0.01 -0.01 0
library(factoextra)

pca %>% fviz_eig()

pca %>% fviz_pca_biplot()

Linear models

rets_lm <- rets |> 
  pivot_wider(names_from = symbol, values_from = r) |> 
  arrange(date) |> 
  filter(date >= as.Date("2021-08-27")) |>
  select(-BOXX) |> 
  select(-date)

model_mbb <- lm(MBB ~ SPY + TLT + SGOV, data = rets_lm)
model_agg <- lm(AGG ~ SPY + TLT + SGOV, data = rets_lm)
model_rem <- lm(REM ~ SPY + TLT + SGOV, data = rets_lm)
model_usrt <- lm(USRT ~ SPY + TLT + SGOV, data = rets_lm)
model_qclr <- lm(QCLR ~ SPY + TLT + SGOV, data = rets_lm)
model_qqq <- lm(QQQ ~ SPY + TLT + SGOV, data = rets_lm)
model_qqqm <- lm(QQQM ~ SPY + TLT + SGOV, data = rets_lm)
model_xclr <- lm(XCLR ~ SPY + TLT + SGOV, data = rets_lm)
model_roro <- lm(RORO ~ SPY + TLT + SGOV, data = rets_lm)
model_jojo <- lm(JOJO ~ SPY + TLT + SGOV, data = rets_lm)
model_stnc <- lm(STNC ~ SPY + TLT + SGOV, data = rets_lm)
model_rpar <- lm(RPAR ~ SPY + TLT + SGOV, data = rets_lm)
model_sdhy <- lm(SDHY ~ SPY + TLT + SGOV, data = rets_lm)
model_bil <- lm(BIL ~ SPY, data = rets_lm)
model_atacx <- lm(ATACX ~ SPY + TLT + SGOV, data = rets_lm)

library(stargazer)

stargazer(model_mbb, model_agg, model_rem, model_usrt, model_qclr, model_qqq, 
          model_qqqm, model_xclr, model_roro, model_jojo, model_stnc, model_rpar, 
          model_sdhy, model_bil, model_atacx, type = "html", 
          title = "Linear models", out = "lm.html")
Linear models
Dependent variable:
MBB AGG REM USRT QCLR QQQ QQQM XCLR RORO JOJO STNC RPAR SDHY BIL ATACX
(1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11) (12) (13) (14) (15)
SPY 0.085*** 0.065*** 0.965*** 0.801*** 0.536*** 1.283*** 1.278*** 0.550*** 0.689*** 0.158*** 0.800*** 0.378*** 0.335*** 0.0003 0.719***
(0.008) (0.005) (0.036) (0.028) (0.018) (0.014) (0.014) (0.043) (0.033) (0.015) (0.013) (0.012) (0.017) (0.001) (0.034)
TLT 0.357*** 0.350*** 0.288*** 0.240*** 0.042** 0.006 0.005 -0.010 0.524*** 0.525*** 0.054*** 0.522*** 0.115*** 0.270***
(0.008) (0.005) (0.035) (0.027) (0.017) (0.014) (0.014) (0.042) (0.032) (0.015) (0.013) (0.012) (0.017) (0.033)
SGOV 0.384 0.743** 0.085 0.404 0.832 1.026 1.034 3.889 1.719 0.970 -1.009 -0.670 1.039 1.250
(0.524) (0.333) (2.394) (1.846) (1.170) (0.962) (0.955) (2.890) (2.184) (0.993) (0.888) (0.826) (1.162) (2.232)
Constant -0.00001 -0.0001 -0.0004 -0.0002 0.00000 -0.0002 -0.0002 -0.001 -0.001 -0.0001 0.00001 -0.00000 -0.0001 0.0001*** -0.001
(0.0001) (0.0001) (0.001) (0.0004) (0.0003) (0.0002) (0.0002) (0.001) (0.0005) (0.0002) (0.0002) (0.0002) (0.0002) (0.00001) (0.0005)
Observations 865 865 865 865 865 865 865 865 865 865 865 865 865 865 865
R2 0.733 0.864 0.488 0.525 0.525 0.902 0.903 0.159 0.469 0.631 0.810 0.776 0.336 0.0004 0.392
Adjusted R2 0.732 0.864 0.486 0.524 0.523 0.902 0.902 0.156 0.468 0.630 0.809 0.775 0.333 -0.001 0.390
Residual Std. Error 0.002 (df = 861) 0.002 (df = 861) 0.011 (df = 861) 0.009 (df = 861) 0.006 (df = 861) 0.005 (df = 861) 0.005 (df = 861) 0.014 (df = 861) 0.010 (df = 861) 0.005 (df = 861) 0.004 (df = 861) 0.004 (df = 861) 0.005 (df = 861) 0.0002 (df = 863) 0.011 (df = 861)
F Statistic 786.806*** (df = 3; 861) 1,827.726*** (df = 3; 861) 273.492*** (df = 3; 861) 317.735*** (df = 3; 861) 316.867*** (df = 3; 861) 2,640.570*** (df = 3; 861) 2,661.046*** (df = 3; 861) 54.172*** (df = 3; 861) 253.932*** (df = 3; 861) 491.278*** (df = 3; 861) 1,220.939*** (df = 3; 861) 991.772*** (df = 3; 861) 145.026*** (df = 3; 861) 0.304 (df = 1; 863) 185.030*** (df = 3; 861)
Note: p<0.1; p<0.05; p<0.01
# annualized alphas
alphas <- rets_lm |> 
  select(-c(SPY, TLT, SGOV)) |>
  map(~ {
    model <- lm(.x ~ SPY + TLT + SGOV, data = rets_lm)
    (1+coef(summary(model))[1, 1])^252-1
  }) |> 
  unlist()

ggplot(data.frame(symbol = names(alphas), alpha = alphas), aes(x = alpha, y = reorder(symbol, alpha))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  geom_point() +
  geom_label(aes(label = scales::percent(alpha, accuracy = 0.01)), nudge_x = 0.01) +
  scale_x_continuous(labels = scales::percent) +
  labs(title = "Annualized alphas (since 2021-08-27)", x = NULL, y = NULL)

rets_lm2023 <- rets_lm <- rets |> 
  pivot_wider(names_from = symbol, values_from = r) |> 
  arrange(date) |> 
  filter(date >= as.Date("2023-01-01")) |> 
  select(-date)
  
# annualized alphas
alphas <- rets_lm2023 |> 
  select(-SPY) |> 
  map(~ {
    model <- lm(.x ~ SPY + TLT + SGOV, data = rets_lm2023)
    (1+coef(summary(model))[1, 1])^252-1
  }) |> 
  unlist()

ggplot(data.frame(symbol = names(alphas), alpha = alphas), aes(x = alpha, y = reorder(symbol, alpha))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  geom_point() +
  geom_label(aes(label = scales::percent(alpha, accuracy = 0.01)), nudge_x = 0.01) +
  scale_x_continuous(labels = scales::percent) +
  labs(title = "Annualized alphas (since 2023-01-01)", x = NULL, y = NULL)

rets_lm2024 <- rets_lm <- rets |> 
  pivot_wider(names_from = symbol, values_from = r) |> 
  arrange(date) |> 
  filter(date >= as.Date("2024-01-01")) |> 
  select(-date)
  
# annualized alphas
alphas <- rets_lm2024 |> 
  select(-SPY) |> 
  map(~ {
    model <- lm(.x ~ SPY + TLT + SGOV, data = rets_lm2024)
    (1+coef(summary(model))[1, 1])^252-1
  }) |> 
  unlist()

ggplot(data.frame(symbol = names(alphas), alpha = alphas), aes(x = alpha, y = reorder(symbol, alpha))) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
  geom_point() +
  geom_label(aes(label = scales::percent(alpha, accuracy = 0.01)), nudge_x = 0.01) +
  scale_x_continuous(labels = scales::percent) +
  labs(title = "Annualized alphas (since 2024-01-01)", x = NULL, y = NULL)