Q1: Import ETF Data

library(tidyquant)
library(lubridate)
library(timetk)
library(purrr)
library(tibble)
library(dplyr)
library(quadprog)
library(ggplot2)
library(tidyr)
library(xts)

tickers <- c("SPY","QQQ","EEM","IWM","EFA","TLT","IYR","GLD")

raw <- tq_get(tickers,
              from = "2010-01-01",
              to   = Sys.Date(),
              get  = "stock.prices")

# Wide tibble of adjusted prices
prices_tbl <- raw %>%
  select(symbol, date, adjusted) %>%
  pivot_wider(names_from = symbol, values_from = adjusted) %>%
  arrange(date)

# Build xts explicitly — extract numeric matrix + Date index
prices_xts <- xts(
  x     = as.matrix(prices_tbl[ , tickers]),   # numeric columns only
  order.by = as.Date(prices_tbl$date)
)
storage.mode(prices_xts) <- "double"           # ensure double, not list

head(prices_xts)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 84.79640 40.29080 30.35151 51.36655 35.12843 55.70950 26.76811
## 2010-01-05 85.02082 40.29080 30.57180 51.18993 35.15941 56.06935 26.83239
## 2010-01-06 85.08069 40.04776 30.63576 51.14177 35.30801 55.31871 26.82070
## 2010-01-07 85.43985 40.07379 30.45811 51.51912 35.17179 55.41175 27.06028
## 2010-01-08 85.72417 40.40363 30.69972 51.80010 35.45043 55.38699 26.87913
## 2010-01-11 85.84387 40.23872 30.63576 51.59135 35.74147 55.08301 27.00767
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
tail(prices_xts)
##               SPY    QQQ   EEM    IWM    EFA   TLT    IYR    GLD
## 2026-06-02 759.57 746.16 70.80 291.66 105.02 85.65  99.99 411.95
## 2026-06-03 754.24 744.21 69.92 287.67 104.12 85.31 100.00 407.87
## 2026-06-04 757.09 740.61 69.10 292.01 104.95 85.50 101.79 411.27
## 2026-06-05 737.55 705.06 64.59 281.65 102.26 85.06 102.54 396.24
## 2026-06-08 739.22 716.07 65.75 284.11 102.88 84.62 101.08 397.27
## 2026-06-09 737.05 707.83 65.82 285.02 102.90 85.12 103.49 390.78

Q2: Weekly and Monthly Simple Returns

# Simple return function: last price / first price - 1
simple_ret <- function(x) {
  apply(x, 2, function(col) col[length(col)] / col[1] - 1)
}

weekly_ret  <- apply.weekly(prices_xts,  simple_ret)
monthly_ret <- apply.monthly(prices_xts, simple_ret)

head(weekly_ret)
##                    SPY          QQQ         EEM          IWM          EFA
## 2010-01-08  0.01094109  0.002800229  0.01147255  0.008440203  0.009166324
## 2010-01-15 -0.00950023 -0.011000880 -0.02690761 -0.009025638 -0.011607773
## 2010-01-22 -0.05084316 -0.052157269 -0.07453269 -0.048110651 -0.064919557
## 2010-01-29 -0.02168152 -0.034303656 -0.04060194 -0.027346137 -0.037594241
## 2010-02-05 -0.02200620 -0.006472222 -0.05367579 -0.025484808 -0.036676827
## 2010-02-12  0.02030434  0.025545225  0.04371427  0.039877305  0.025564866
##                     TLT          IYR         GLD
## 2010-01-08 -0.005789203  0.004147369  0.01429872
## 2010-01-15  0.025676468 -0.011033914 -0.01763401
## 2010-01-22  0.012991873 -0.060489016 -0.03900644
## 2010-01-29  0.007970863 -0.015639456 -0.01414221
## 2010-02-05  0.009106177 -0.014699176 -0.03387170
## 2010-02-12 -0.020737426  0.015500282  0.02883506
head(monthly_ret)
##                     SPY         QQQ          EEM         IWM         EFA
## 2010-01-29 -0.052413555 -0.07819917 -0.103722796 -0.06048732 -0.07491626
## 2010-02-26  0.015404464  0.03467413 -0.008903633  0.03255515 -0.01534469
## 2010-03-31  0.049975723  0.06169173  0.063099437  0.05771658  0.05562872
## 2010-04-30  0.008574028  0.02242545 -0.027071305  0.04705514 -0.04493576
## 2010-05-28 -0.091233875 -0.08672176 -0.098864825 -0.09568721 -0.11824828
## 2010-06-30 -0.035514642 -0.05101632  0.004468037 -0.04856824 -0.01079274
##                     TLT         IYR          GLD
## 2010-01-29  0.027836790 -0.05195409 -0.034972713
## 2010-02-26  0.005704983  0.03573005  0.009967714
## 2010-03-31 -0.020145070  0.08633625 -0.004386396
## 2010-04-30  0.035749667  0.05898811  0.046254293
## 2010-05-28  0.052459973 -0.08516431  0.027218472
## 2010-06-30  0.050595019 -0.02782228  0.014761042

Q3: Monthly Returns as Tibble

monthly_tbl <- tk_tbl(monthly_ret, rename_index = "date") %>%
  mutate(date = floor_date(as.Date(date), "month"))

head(monthly_tbl)
## # A tibble: 6 × 9
##   date            SPY     QQQ      EEM     IWM     EFA      TLT     IYR      GLD
##   <date>        <dbl>   <dbl>    <dbl>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-01-01 -0.0524  -0.0782 -0.104   -0.0605 -0.0749  0.0278  -0.0520 -0.0350 
## 2 2010-02-01  0.0154   0.0347 -0.00890  0.0326 -0.0153  0.00570  0.0357  0.00997
## 3 2010-03-01  0.0500   0.0617  0.0631   0.0577  0.0556 -0.0201   0.0863 -0.00439
## 4 2010-04-01  0.00857  0.0224 -0.0271   0.0471 -0.0449  0.0357   0.0590  0.0463 
## 5 2010-05-01 -0.0912  -0.0867 -0.0989  -0.0957 -0.118   0.0525  -0.0852  0.0272 
## 6 2010-06-01 -0.0355  -0.0510  0.00447 -0.0486 -0.0108  0.0506  -0.0278  0.0148

Q4: Fama-French 3-Factor Data

# Download FF3 monthly data
ff3_raw <- tq_get("F-F_Research_Data_Factors",
                   get  = "economic.data",   # tidyquant has FF via frenchdata pkg
                   from = "2010-01-01")

# Alternative: use frenchdata package
if (!requireNamespace("frenchdata", quietly = TRUE)) {
  install.packages("frenchdata")
}
library(frenchdata)

ff3_data <- download_french_data("Fama/French 3 Factors")
ff3_monthly <- ff3_data$subsets$data[[1]] %>%      # Monthly data
  mutate(date = as.Date(paste0(date, "01"), "%Y%m%d"),
         date = floor_date(date, "month")) %>%
  filter(date >= "2010-01-01") %>%
  mutate(across(c(`Mkt-RF`, SMB, HML, RF), ~ as.numeric(.) / 100))

head(ff3_monthly)
## # A tibble: 6 × 5
##   date       `Mkt-RF`     SMB     HML     RF
##   <date>        <dbl>   <dbl>   <dbl>  <dbl>
## 1 2010-01-01  -0.0335  0.0043  0.0033 0     
## 2 2010-02-01   0.0339  0.0118  0.0318 0     
## 3 2010-03-01   0.063   0.0146  0.0219 0.0001
## 4 2010-04-01   0.0199  0.0484  0.0296 0.0001
## 5 2010-05-01  -0.079   0.0013 -0.0248 0.0001
## 6 2010-06-01  -0.0556 -0.0179 -0.0473 0.0001

Q5: Merge Monthly Returns with FF3 Factors

merged_tbl <- monthly_tbl %>%
  inner_join(ff3_monthly, by = "date")

head(merged_tbl)
## # A tibble: 6 × 13
##   date            SPY     QQQ      EEM     IWM     EFA      TLT     IYR      GLD
##   <date>        <dbl>   <dbl>    <dbl>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-01-01 -0.0524  -0.0782 -0.104   -0.0605 -0.0749  0.0278  -0.0520 -0.0350 
## 2 2010-02-01  0.0154   0.0347 -0.00890  0.0326 -0.0153  0.00570  0.0357  0.00997
## 3 2010-03-01  0.0500   0.0617  0.0631   0.0577  0.0556 -0.0201   0.0863 -0.00439
## 4 2010-04-01  0.00857  0.0224 -0.0271   0.0471 -0.0449  0.0357   0.0590  0.0463 
## 5 2010-05-01 -0.0912  -0.0867 -0.0989  -0.0957 -0.118   0.0525  -0.0852  0.0272 
## 6 2010-06-01 -0.0355  -0.0510  0.00447 -0.0486 -0.0108  0.0506  -0.0278  0.0148 
## # ℹ 4 more variables: `Mkt-RF` <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
dim(merged_tbl)
## [1] 196  13

Q6: CAPM-Based GMV Portfolio (Single Period: 2010/02–2015/01)

# Helper: compute GMV weights via quadprog
gmv_weights <- function(cov_mat) {
  n <- ncol(cov_mat)
  Dmat <- 2 * cov_mat
  dvec <- rep(0, n)
  # Constraints: sum(w) = 1, w >= 0 (long only)
  Amat <- cbind(rep(1, n), diag(n))
  bvec <- c(1, rep(0, n))
  sol  <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
  sol$solution
}

# Helper: CAPM covariance matrix
# Cov(i,j) = beta_i * beta_j * Var(Rm) + [if i==j: sigma_eps_i^2]
capm_cov <- function(ret_mat, mkt_excess) {
  n <- ncol(ret_mat)
  betas    <- numeric(n)
  residvar <- numeric(n)
  var_mkt  <- var(mkt_excess)
  for (i in seq_len(n)) {
    fit       <- lm(ret_mat[, i] ~ mkt_excess)
    betas[i]    <- coef(fit)[2]
    residvar[i] <- var(resid(fit))
  }
  # Cov matrix
  cov_mat <- betas %o% betas * var_mkt
  diag(cov_mat) <- diag(cov_mat) + residvar
  colnames(cov_mat) <- rownames(cov_mat) <- colnames(ret_mat)
  cov_mat
}

# Training window: 2010/02 – 2015/01 (60 months)
train <- merged_tbl %>%
  filter(date >= "2010-02-01", date <= "2015-01-01")

ret_mat_train <- train %>% select(all_of(tickers)) %>% as.matrix()
mkt_ex_train  <- train$`Mkt-RF`

cov_capm  <- capm_cov(ret_mat_train, mkt_ex_train)
w_capm    <- gmv_weights(cov_capm)
names(w_capm) <- tickers

cat("CAPM GMV Weights (2015/01):\n")
## CAPM GMV Weights (2015/01):
print(round(w_capm, 4))
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.2607 0.1035 0.0052 0.0000 0.0347 0.4598 0.0578 0.0783
# Realized return 2015/02
ret_201502 <- merged_tbl %>%
  filter(date == "2015-02-01") %>%
  select(all_of(tickers)) %>%
  as.matrix()

realized_capm <- sum(w_capm * ret_201502)
cat(sprintf("\nRealized portfolio return (CAPM GMV) on 2015/02: %.4f (%.2f%%)\n",
            realized_capm, realized_capm * 100))
## 
## Realized portfolio return (CAPM GMV) on 2015/02: -0.0122 (-1.22%)

Q7: FF3-Based GMV Portfolio (Single Period)

# FF3 covariance matrix
ff3_cov <- function(ret_mat, factors_mat) {
  n       <- ncol(ret_mat)
  k       <- ncol(factors_mat)  # 3 factors
  betas   <- matrix(0, n, k)
  residvar <- numeric(n)
  for (i in seq_len(n)) {
    fit          <- lm(ret_mat[, i] ~ factors_mat)
    betas[i, ]   <- coef(fit)[-1]
    residvar[i]  <- var(resid(fit))
  }
  F_cov   <- cov(factors_mat)
  cov_sys <- betas %*% F_cov %*% t(betas)
  cov_mat <- cov_sys
  diag(cov_mat) <- diag(cov_mat) + residvar
  colnames(cov_mat) <- rownames(cov_mat) <- colnames(ret_mat)
  cov_mat
}

factors_train <- train %>%
  select(`Mkt-RF`, SMB, HML) %>%
  as.matrix()

cov_ff3   <- ff3_cov(ret_mat_train, factors_train)
w_ff3     <- gmv_weights(cov_ff3)
names(w_ff3) <- tickers

cat("FF3 GMV Weights (2015/01):\n")
## FF3 GMV Weights (2015/01):
print(round(w_ff3, 4))
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.3279 0.0282 0.0000 0.0289 0.0214 0.4688 0.0640 0.0608
realized_ff3 <- sum(w_ff3 * ret_201502)
cat(sprintf("\nRealized portfolio return (FF3 GMV) on 2015/02: %.4f (%.2f%%)\n",
            realized_ff3, realized_ff3 * 100))
## 
## Realized portfolio return (FF3 GMV) on 2015/02: -0.0132 (-1.32%)

Q8: Rolling Backtest — GMV Portfolios (2015/02 – 2026/05)

# All months in the dataset
all_dates <- merged_tbl$date

# Rolling window: 60 months prior to each rebalance date
backtest_start <- as.Date("2015-02-01")
backtest_end   <- as.Date("2026-05-01")

rebalance_dates <- all_dates[all_dates >= backtest_start &
                               all_dates <= backtest_end]

results <- map_dfr(rebalance_dates, function(t) {
  # Training window: 60 months ending at t-1
  train_end   <- all_dates[all_dates < t]
  if (length(train_end) < 60) return(NULL)
  train_start <- tail(train_end, 60)[1]

  win <- merged_tbl %>%
    filter(date >= train_start, date <= tail(train_end, 1))

  r_mat  <- win %>% select(all_of(tickers)) %>% as.matrix()
  f_mat  <- win %>% select(`Mkt-RF`, SMB, HML) %>% as.matrix()
  mkt_ex <- win$`Mkt-RF`

  # Compute covariances and weights
  cov_c  <- tryCatch(capm_cov(r_mat, mkt_ex),  error = function(e) NULL)
  cov_f  <- tryCatch(ff3_cov(r_mat, f_mat),     error = function(e) NULL)
  if (is.null(cov_c) || is.null(cov_f)) return(NULL)

  w_c <- tryCatch(gmv_weights(cov_c), error = function(e) NULL)
  w_f <- tryCatch(gmv_weights(cov_f), error = function(e) NULL)
  if (is.null(w_c) || is.null(w_f)) return(NULL)

  # Realized return at t
  ret_t <- merged_tbl %>%
    filter(date == t) %>%
    select(all_of(tickers)) %>%
    as.matrix()
  if (nrow(ret_t) == 0) return(NULL)

  tibble(
    date         = t,
    ret_capm     = sum(w_c * ret_t),
    ret_ff3      = sum(w_f * ret_t)
  )
})

# Cumulative returns
results <- results %>%
  mutate(
    cum_capm = cumprod(1 + ret_capm) - 1,
    cum_ff3  = cumprod(1 + ret_ff3)  - 1
  )

# Plot
results %>%
  select(date, cum_capm, cum_ff3) %>%
  pivot_longer(-date, names_to = "Model", values_to = "Cumulative_Return") %>%
  mutate(Model = recode(Model,
                        cum_capm = "CAPM GMV",
                        cum_ff3  = "FF3 GMV")) %>%
  ggplot(aes(x = date, y = Cumulative_Return, color = Model)) +
  geom_line(linewidth = 1) +
  scale_y_continuous(labels = scales::percent) +
  labs(title    = "Cumulative Returns: GMV Portfolios (2015/02 – 2026/05)",
       subtitle = "Rolling 60-month estimation window, rebalanced monthly",
       x = "Date", y = "Cumulative Return",
       color = "Model") +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")