Load Libraries

library(tidyquant)
library(lubridate)
library(timetk)
library(purrr)
library(tidyverse)
library(quadprog)
library(PerformanceAnalytics)

Q1: Import ETF Data

tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

etf_raw <- tq_get(tickers,
                  from = "2010-01-01",
                  to   = Sys.Date(),
                  get  = "stock.prices")

# Extract adjusted closing prices as xts
prices_tbl <- etf_raw %>%
  select(symbol, date, adjusted) %>%
  pivot_wider(names_from = symbol, values_from = adjusted) %>%
  arrange(date)

prices_xts <- prices_tbl %>%
  tk_xts(date_var = date)

# Preview
head(prices_xts)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 84.79635 40.29080 30.35151 51.36655 35.12844 55.70950 26.76810
## 2010-01-05 85.02084 40.29080 30.57182 51.18994 35.15939 56.06932 26.83237
## 2010-01-06 85.08070 40.04777 30.63576 51.14177 35.30801 55.31874 26.82069
## 2010-01-07 85.43984 40.07380 30.45810 51.51910 35.17179 55.41179 27.06027
## 2010-01-08 85.72417 40.40363 30.69972 51.80011 35.45043 55.38700 26.87914
## 2010-01-11 85.84389 40.23870 30.63576 51.59136 35.74147 55.08303 27.00768
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
tail(prices_xts)
##               SPY    QQQ   EEM    IWM    EFA   TLT    IYR    GLD
## 2026-06-02 759.57 746.16 70.80 291.66 105.02 85.65  99.99 411.95
## 2026-06-03 754.24 744.21 69.92 287.67 104.12 85.31 100.00 407.87
## 2026-06-04 757.09 740.61 69.10 292.01 104.95 85.50 101.79 411.27
## 2026-06-05 737.55 705.06 64.59 281.65 102.26 85.06 102.54 396.24
## 2026-06-08 739.22 716.07 65.75 284.11 102.88 84.62 101.08 397.27
## 2026-06-09 737.05 707.83 65.82 285.02 102.90 85.12 103.49 390.78

Q2: Calculate Weekly and Monthly Returns (Simple Returns)

# Weekly returns
weekly_returns_xts <- apply.weekly(prices_xts, function(x) {
  (last(x) - first(x)) / first(x)
})

# Monthly returns
monthly_returns_xts <- apply.monthly(prices_xts, function(x) {
  (last(x) - first(x)) / first(x)
})

head(weekly_returns_xts)
##                     SPY          QQQ         EEM          IWM          EFA
## 2010-01-08  0.010941725  0.002800514  0.01147262  0.008440352  0.009166214
## 2010-01-15 -0.009500583 -0.011000693 -0.02690749 -0.009025636 -0.011607773
## 2010-01-22 -0.050842983 -0.052157180 -0.07453253 -0.048111080 -0.064919564
## 2010-01-29 -0.021681710 -0.034303660 -0.04060137 -0.027346596 -0.037594246
## 2010-02-05 -0.022006388 -0.006472422 -0.05367618 -0.025485579 -0.036676370
## 2010-02-12  0.020304236  0.025545315  0.04371471  0.039877302  0.025564553
##                     TLT         IYR         GLD
## 2010-01-08 -0.005788860  0.00414794  0.01429872
## 2010-01-15  0.025674933 -0.01103391 -0.01763401
## 2010-01-22  0.012992347 -0.06048839 -0.03900644
## 2010-01-29  0.007972009 -0.01563865 -0.01414221
## 2010-02-05  0.009105912 -0.01469903 -0.03387170
## 2010-02-12 -0.020737685  0.01549990  0.02883506
head(monthly_returns_xts)
##                     SPY         QQQ          EEM         IWM         EFA
## 2010-01-29 -0.052413138 -0.07819918 -0.103722746 -0.06048769 -0.07491647
## 2010-02-26  0.015404275  0.03467360 -0.008903971  0.03255490 -0.01534423
## 2010-03-31  0.049976657  0.06169162  0.063099488  0.05771679  0.05562921
## 2010-04-30  0.008573766  0.02242527 -0.027070395  0.04705571 -0.04493609
## 2010-05-28 -0.091233730 -0.08672108 -0.098864990 -0.09568651 -0.11824811
## 2010-06-30 -0.035514730 -0.05101661  0.004468109 -0.04856803 -0.01079319
##                     TLT         IYR          GLD
## 2010-01-29  0.027837474 -0.05195346 -0.034972713
## 2010-02-26  0.005704851  0.03573048  0.009967714
## 2010-03-31 -0.020144066  0.08633654 -0.004386396
## 2010-04-30  0.035750402  0.05898850  0.046254293
## 2010-05-28  0.052460039 -0.08516514  0.027218472
## 2010-06-30  0.050593359 -0.02782216  0.014761042

Q3: Convert Monthly Returns to Tibble

monthly_returns_tbl <- monthly_returns_xts %>%
  tk_tbl(rename_index = "date") %>%
  mutate(date = as.yearmon(date))

head(monthly_returns_tbl)
## # A tibble: 6 × 9
##   date           SPY     QQQ      EEM     IWM     EFA      TLT     IYR      GLD
##   <yearmon>    <dbl>   <dbl>    <dbl>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1 Jan 2010  -0.0524  -0.0782 -0.104   -0.0605 -0.0749  0.0278  -0.0520 -0.0350 
## 2 Feb 2010   0.0154   0.0347 -0.00890  0.0326 -0.0153  0.00570  0.0357  0.00997
## 3 Mar 2010   0.0500   0.0617  0.0631   0.0577  0.0556 -0.0201   0.0863 -0.00439
## 4 Apr 2010   0.00857  0.0224 -0.0271   0.0471 -0.0449  0.0358   0.0590  0.0463 
## 5 May 2010  -0.0912  -0.0867 -0.0989  -0.0957 -0.118   0.0525  -0.0852  0.0272 
## 6 Jun 2010  -0.0355  -0.0510  0.00447 -0.0486 -0.0108  0.0506  -0.0278  0.0148

Q4: Download Fama-French 3-Factor Data

# Download directly from Ken French's data library
ff3_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"

temp <- tempfile()
download.file(ff3_url, temp, mode = "wb")

# Unzip and read
csv_file <- unzip(temp, exdir = tempdir())
ff3_raw  <- read.csv(csv_file[1], skip = 3, stringsAsFactors = FALSE)

# Clean: keep only monthly rows (YYYYMM format, 6 digits)
ff3_csv <- ff3_raw %>%
  rename(date = 1, Mkt_RF = 2, SMB = 3, HML = 4, RF = 5) %>%
  filter(grepl("^\\s*\\d{6}\\s*$", date)) %>%   # monthly rows only
  mutate(
    date   = as.yearmon(trimws(date), format = "%Y%m"),
    Mkt_RF = as.numeric(Mkt_RF) / 100,
    SMB    = as.numeric(SMB)    / 100,
    HML    = as.numeric(HML)    / 100,
    RF     = as.numeric(RF)     / 100
  ) %>%
  filter(!is.na(date), date >= as.yearmon("Jan 2010"))

unlink(temp)

head(ff3_csv)
##       date  Mkt_RF     SMB     HML    RF
## 1 Jan 2010 -0.0335  0.0043  0.0033 0e+00
## 2 Feb 2010  0.0339  0.0118  0.0318 0e+00
## 3 Mar 2010  0.0630  0.0146  0.0219 1e-04
## 4 Apr 2010  0.0199  0.0484  0.0296 1e-04
## 5 May 2010 -0.0790  0.0013 -0.0248 1e-04
## 6 Jun 2010 -0.0556 -0.0179 -0.0473 1e-04
tail(ff3_csv)
##         date  Mkt_RF     SMB     HML     RF
## 191 Nov 2025 -0.0013  0.0054  0.0357 0.0030
## 192 Dec 2025 -0.0036 -0.0103  0.0236 0.0034
## 193 Jan 2026  0.0103  0.0212  0.0386 0.0030
## 194 Feb 2026 -0.0117  0.0024  0.0265 0.0028
## 195 Mar 2026 -0.0518  0.0044  0.0335 0.0029
## 196 Apr 2026  0.0994  0.0013 -0.0127 0.0029

Q5: Merge Monthly Returns with FF3 Data

merged_tbl <- monthly_returns_tbl %>%
  inner_join(ff3_csv, by = "date") %>%
  arrange(date)

head(merged_tbl)
## # A tibble: 6 × 13
##   date           SPY     QQQ      EEM     IWM     EFA      TLT     IYR      GLD
##   <yearmon>    <dbl>   <dbl>    <dbl>   <dbl>   <dbl>    <dbl>   <dbl>    <dbl>
## 1 Jan 2010  -0.0524  -0.0782 -0.104   -0.0605 -0.0749  0.0278  -0.0520 -0.0350 
## 2 Feb 2010   0.0154   0.0347 -0.00890  0.0326 -0.0153  0.00570  0.0357  0.00997
## 3 Mar 2010   0.0500   0.0617  0.0631   0.0577  0.0556 -0.0201   0.0863 -0.00439
## 4 Apr 2010   0.00857  0.0224 -0.0271   0.0471 -0.0449  0.0358   0.0590  0.0463 
## 5 May 2010  -0.0912  -0.0867 -0.0989  -0.0957 -0.118   0.0525  -0.0852  0.0272 
## 6 Jun 2010  -0.0355  -0.0510  0.00447 -0.0486 -0.0108  0.0506  -0.0278  0.0148 
## # ℹ 4 more variables: Mkt_RF <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
dim(merged_tbl)
## [1] 196  13

Q6: GMV Portfolio via CAPM Covariance Matrix

# --- Helper: Global Minimum Variance weights ---
gmv_weights <- function(cov_matrix) {
  n  <- ncol(cov_matrix)
  Dmat <- 2 * cov_matrix
  dvec <- rep(0, n)
  # Constraints: weights sum to 1 (Aeq w = beq), weights >= 0
  Amat <- cbind(rep(1, n), diag(n))
  bvec <- c(1, rep(0, n))
  sol  <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
  return(sol$solution)
}

# --- Helper: CAPM covariance matrix ---
capm_cov <- function(returns_mat, ff3_mat) {
  # Excess returns: subtract RF
  rf      <- ff3_mat[, "RF"]
  mkt_rf  <- ff3_mat[, "Mkt_RF"]
  exc_ret <- sweep(returns_mat, 1, rf, "-")   # asset excess returns

  n_assets <- ncol(exc_ret)
  betas    <- numeric(n_assets)
  resid_var <- numeric(n_assets)

  for (i in seq_len(n_assets)) {
    fit        <- lm(exc_ret[, i] ~ mkt_rf)
    betas[i]   <- coef(fit)[2]
    resid_var[i] <- var(residuals(fit))
  }

  var_mkt  <- var(mkt_rf)
  # CAPM cov matrix: beta %*% t(beta) * var(Mkt) + diag(residual variances)
  cov_mat  <- (betas %o% betas) * var_mkt + diag(resid_var)
  return(list(cov_mat = cov_mat, betas = betas))
}

# --- Q6 computation ---
window_start <- as.yearmon("Feb 2010")
window_end   <- as.yearmon("Jan 2015")
invest_month <- as.yearmon("Feb 2015")

# Filter training window
train_tbl <- merged_tbl %>%
  filter(date >= window_start, date <= window_end)

train_returns <- train_tbl %>%
  select(all_of(tickers)) %>%
  as.matrix()

train_ff3 <- train_tbl %>%
  select(Mkt_RF, SMB, HML, RF) %>%
  as.matrix()

# CAPM covariance matrix
capm_result  <- capm_cov(train_returns, train_ff3)
capm_cov_mat <- capm_result$cov_mat
colnames(capm_cov_mat) <- rownames(capm_cov_mat) <- tickers

cat("CAPM Covariance Matrix (2010/02 - 2015/01):\n")
## CAPM Covariance Matrix (2010/02 - 2015/01):
round(capm_cov_mat, 6)
##           SPY       QQQ       EEM       IWM       EFA       TLT       IYR
## SPY  0.001483  0.001370  0.001560  0.001722  0.001442 -0.000930  0.001176
## QQQ  0.001370  0.001784  0.001634  0.001803  0.001510 -0.000974  0.001232
## EEM  0.001560  0.001634  0.003297  0.002053  0.001719 -0.001109  0.001402
## IWM  0.001722  0.001803  0.002053  0.003000  0.001897 -0.001224  0.001548
## EFA  0.001442  0.001510  0.001719  0.001897  0.002317 -0.001024  0.001296
## TLT -0.000930 -0.000974 -0.001109 -0.001224 -0.001024  0.001493 -0.000836
## IYR  0.001176  0.001232  0.001402  0.001548  0.001296 -0.000836  0.002188
## GLD  0.000201  0.000210  0.000239  0.000264  0.000221 -0.000143  0.000180
##           GLD
## SPY  0.000201
## QQQ  0.000210
## EEM  0.000239
## IWM  0.000264
## EFA  0.000221
## TLT -0.000143
## IYR  0.000180
## GLD  0.002740
# GMV weights
w_capm <- gmv_weights(capm_cov_mat)
names(w_capm) <- tickers

cat("\nGMV Weights (CAPM, Jan 2015):\n")
## 
## GMV Weights (CAPM, Jan 2015):
round(w_capm, 4)
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.2604 0.1035 0.0052 0.0000 0.0348 0.4598 0.0578 0.0784
# Realized portfolio return in Feb 2015
feb2015 <- merged_tbl %>% filter(date == invest_month)
ret_feb2015 <- feb2015 %>% select(all_of(tickers)) %>% as.numeric()
port_ret_capm_q6 <- sum(w_capm * ret_feb2015)

cat("\nRealized Portfolio Return in Feb 2015 (CAPM GMV):", 
    round(port_ret_capm_q6 * 100, 4), "%\n")
## 
## Realized Portfolio Return in Feb 2015 (CAPM GMV): -1.2232 %

Q7: GMV Portfolio via FF3 Covariance Matrix

# --- Helper: FF3 covariance matrix ---
ff3_cov <- function(returns_mat, ff3_mat) {
  rf      <- ff3_mat[, "RF"]
  factors <- ff3_mat[, c("Mkt_RF", "SMB", "HML")]
  exc_ret <- sweep(returns_mat, 1, rf, "-")

  n_assets <- ncol(exc_ret)
  B        <- matrix(0, n_assets, 3)    # factor loadings
  resid_var <- numeric(n_assets)

  for (i in seq_len(n_assets)) {
    fit         <- lm(exc_ret[, i] ~ factors)
    B[i, ]      <- coef(fit)[-1]
    resid_var[i] <- var(residuals(fit))
  }

  cov_factors <- cov(factors)
  # FF3 cov: B %*% cov(F) %*% t(B) + diag(residual variances)
  cov_mat     <- B %*% cov_factors %*% t(B) + diag(resid_var)
  return(list(cov_mat = cov_mat, B = B))
}

# FF3 covariance matrix
ff3_result  <- ff3_cov(train_returns, train_ff3)
ff3_cov_mat <- ff3_result$cov_mat
colnames(ff3_cov_mat) <- rownames(ff3_cov_mat) <- tickers

cat("FF3 Covariance Matrix (2010/02 - 2015/01):\n")
## FF3 Covariance Matrix (2010/02 - 2015/01):
round(ff3_cov_mat, 6)
##           SPY       QQQ       EEM       IWM       EFA       TLT       IYR
## SPY  0.001483  0.001373  0.001561  0.001695  0.001455 -0.000924  0.001175
## QQQ  0.001373  0.001784  0.001671  0.001799  0.001540 -0.000912  0.001245
## EEM  0.001561  0.001671  0.003297  0.002066  0.001730 -0.001075  0.001411
## IWM  0.001695  0.001799  0.002066  0.003000  0.001745 -0.001271  0.001575
## EFA  0.001455  0.001540  0.001730  0.001745  0.002317 -0.000977  0.001290
## TLT -0.000924 -0.000912 -0.001075 -0.001271 -0.000977  0.001493 -0.000827
## IYR  0.001175  0.001245  0.001411  0.001575  0.001290 -0.000827  0.002188
## GLD  0.000197  0.000350  0.000324  0.000381  0.000233 -0.000025  0.000218
##           GLD
## SPY  0.000197
## QQQ  0.000350
## EEM  0.000324
## IWM  0.000381
## EFA  0.000233
## TLT -0.000025
## IYR  0.000218
## GLD  0.002740
# GMV weights
w_ff3 <- gmv_weights(ff3_cov_mat)
names(w_ff3) <- tickers

cat("\nGMV Weights (FF3, Jan 2015):\n")
## 
## GMV Weights (FF3, Jan 2015):
round(w_ff3, 4)
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.3275 0.0282 0.0000 0.0292 0.0214 0.4689 0.0641 0.0608
# Realized portfolio return in Feb 2015
port_ret_ff3_q7 <- sum(w_ff3 * ret_feb2015)

cat("\nRealized Portfolio Return in Feb 2015 (FF3 GMV):",
    round(port_ret_ff3_q7 * 100, 4), "%\n")
## 
## Realized Portfolio Return in Feb 2015 (FF3 GMV): -1.3194 %

Q8: Backtest — Rolling GMV Portfolios (CAPM vs FF3)

# Dates: rolling window 2010/02 to 2015/01, invest from 2015/02 to 2026/05
all_dates   <- merged_tbl$date
invest_dates <- all_dates[all_dates >= as.yearmon("Feb 2015") &
                           all_dates <= as.yearmon("May 2026")]

capm_returns <- numeric(length(invest_dates))
ff3_returns  <- numeric(length(invest_dates))

for (i in seq_along(invest_dates)) {
  t_invest <- invest_dates[i]
  t_end    <- all_dates[which(all_dates == t_invest) - 1]
  t_start  <- all_dates[which(all_dates == t_end) - 59]   # 60-month window

  if (is.na(t_start)) next

  # Training window
  win_tbl <- merged_tbl %>%
    filter(date >= t_start, date <= t_end)

  if (nrow(win_tbl) < 60) next

  win_ret <- win_tbl %>% select(all_of(tickers)) %>% as.matrix()
  win_ff3 <- win_tbl %>% select(Mkt_RF, SMB, HML, RF) %>% as.matrix()

  # CAPM GMV
  tryCatch({
    cm      <- capm_cov(win_ret, win_ff3)$cov_mat
    w_c     <- gmv_weights(cm)
    r_t     <- merged_tbl %>% filter(date == t_invest) %>%
                 select(all_of(tickers)) %>% as.numeric()
    capm_returns[i] <- sum(w_c * r_t)
  }, error = function(e) { capm_returns[i] <<- NA })

  # FF3 GMV
  tryCatch({
    fm      <- ff3_cov(win_ret, win_ff3)$cov_mat
    w_f     <- gmv_weights(fm)
    r_t     <- merged_tbl %>% filter(date == t_invest) %>%
                 select(all_of(tickers)) %>% as.numeric()
    ff3_returns[i] <- sum(w_f * r_t)
  }, error = function(e) { ff3_returns[i] <<- NA })
}

# Compile results
backtest_tbl <- tibble(
  date         = invest_dates,
  CAPM_return  = capm_returns,
  FF3_return   = ff3_returns
) %>%
  filter(!is.na(CAPM_return), !is.na(FF3_return)) %>%
  mutate(
    CAPM_cumret = cumprod(1 + CAPM_return) - 1,
    FF3_cumret  = cumprod(1 + FF3_return)  - 1
  )

# Plot cumulative returns
backtest_tbl %>%
  select(date, CAPM_cumret, FF3_cumret) %>%
  pivot_longer(-date, names_to = "Model", values_to = "CumReturn") %>%
  mutate(date = as.Date(date)) %>%
  ggplot(aes(x = date, y = CumReturn, color = Model)) +
  geom_line(linewidth = 1.1) +
  scale_y_continuous(labels = scales::percent) +
  scale_color_manual(values = c("CAPM_cumret" = "#2196F3",
                                "FF3_cumret"  = "#E91E63"),
                     labels = c("CAPM GMV", "FF3 GMV")) +
  labs(
    title    = "Cumulative Returns: GMV Portfolios (2015/02 – 2026/05)",
    subtitle = "CAPM vs. Fama-French 3-Factor Covariance Models",
    x        = "Date",
    y        = "Cumulative Return",
    color    = "Model"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "bottom")

# Summary statistics
cat("\n--- Performance Summary ---\n")
## 
## --- Performance Summary ---
cat("CAPM GMV  | Ann. Return:", 
    round(mean(capm_returns, na.rm=TRUE)*12*100, 2), "%",
    "| Ann. Vol:", 
    round(sd(capm_returns, na.rm=TRUE)*sqrt(12)*100, 2), "%\n")
## CAPM GMV  | Ann. Return: 5.87 % | Ann. Vol: 10.06 %
cat("FF3 GMV   | Ann. Return:", 
    round(mean(ff3_returns, na.rm=TRUE)*12*100, 2), "%",
    "| Ann. Vol:", 
    round(sd(ff3_returns, na.rm=TRUE)*sqrt(12)*100, 2), "%\n")
## FF3 GMV   | Ann. Return: 5.68 % | Ann. Vol: 10.15 %