Setup: Load Libraries
library(tidyquant)
library(tidyverse)
library(lubridate)
library(timetk)
library(purrr)
library(quadprog)
library(PerformanceAnalytics)
library(frenchdata) # for Fama-French data; install via: install.packages("frenchdata")
Q1: Import ETF Data from Yahoo Finance
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
# Download daily adjusted closing prices from 2010-01-01 to today
prices_raw <- tq_get(tickers,
from = "2010-01-01",
to = Sys.Date(),
get = "stock.prices")
# Extract adjusted prices and pivot to wide format
prices <- prices_raw %>%
select(symbol, date, adjusted) %>%
pivot_wider(names_from = symbol, values_from = adjusted) %>%
arrange(date)
# Preview first and last rows
head(prices)
## # A tibble: 6 × 9
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-04 84.8 40.3 30.4 51.4 35.1 55.7 26.8 110.
## 2 2010-01-05 85.0 40.3 30.6 51.2 35.2 56.1 26.8 110.
## 3 2010-01-06 85.1 40.0 30.6 51.1 35.3 55.3 26.8 112.
## 4 2010-01-07 85.4 40.1 30.5 51.5 35.2 55.4 27.1 111.
## 5 2010-01-08 85.7 40.4 30.7 51.8 35.5 55.4 26.9 111.
## 6 2010-01-11 85.8 40.2 30.6 51.6 35.7 55.1 27.0 113.
tail(prices)
## # A tibble: 6 × 9
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2026-06-02 760. 746. 70.8 292. 105. 85.7 100.0 412.
## 2 2026-06-03 754. 744. 69.9 288. 104. 85.3 100 408.
## 3 2026-06-04 757. 741. 69.1 292. 105. 85.5 102. 411.
## 4 2026-06-05 738. 705. 64.6 282. 102. 85.1 103. 396.
## 5 2026-06-08 739. 716. 65.8 284. 103. 84.6 101. 397.
## 6 2026-06-09 737. 708. 65.8 285. 103. 85.1 103. 391.
Q2: Calculate Weekly and Monthly Returns (Simple Returns)
# Convert to xts for timetk/PerformanceAnalytics compatibility
prices_xts <- prices %>%
column_to_rownames("date") %>%
as.xts()
# Weekly simple returns — use lapply so each column stays as xts, then merge
weekly_returns <- do.call(merge, lapply(tickers, function(tk) {
xts::apply.weekly(prices_xts[, tk],
function(w) (as.numeric(last(w)) - as.numeric(first(w))) / as.numeric(first(w)))
}))
colnames(weekly_returns) <- tickers
# Monthly simple returns
monthly_returns <- do.call(merge, lapply(tickers, function(tk) {
xts::apply.monthly(prices_xts[, tk],
function(m) (as.numeric(last(m)) - as.numeric(first(m))) / as.numeric(first(m)))
}))
colnames(monthly_returns) <- tickers
cat("Weekly returns dimensions:", dim(weekly_returns), "\n")
## Weekly returns dimensions: 859 8
cat("Monthly returns dimensions:", dim(monthly_returns), "\n")
## Monthly returns dimensions: 198 8
head(monthly_returns)
## SPY QQQ EEM IWM EFA
## 2010-01-29 -0.052413475 -0.07819947 -0.103722915 -0.06048740 -0.07491646
## 2010-02-26 0.015403993 0.03467422 -0.008903497 0.03255474 -0.01534424
## 2010-03-31 0.049975809 0.06169172 0.063099645 0.05771641 0.05562872
## 2010-04-30 0.008573767 0.02242518 -0.027070882 0.04705528 -0.04493597
## 2010-05-28 -0.091233569 -0.08672135 -0.098864526 -0.09568648 -0.11824815
## 2010-06-30 -0.035515192 -0.05101556 0.004468326 -0.04856783 -0.01079268
## TLT IYR GLD
## 2010-01-29 0.027835667 -0.05195359 -0.034972713
## 2010-02-26 0.005704043 0.03573010 0.009967714
## 2010-03-31 -0.020144808 0.08633669 -0.004386396
## 2010-04-30 0.035750993 0.05898803 0.046254293
## 2010-05-28 0.052460029 -0.08516463 0.027218472
## 2010-06-30 0.050593306 -0.02782190 0.014761042
Q4: Download Fama-French 3-Factor Data
# Download FF3 monthly factors using frenchdata package
ff3_raw <- download_french_data("Fama/French 3 Factors")
ff3_monthly <- ff3_raw$subsets$data[[1]] %>%
rename(date = date,
Mkt_RF = `Mkt-RF`,
SMB = SMB,
HML = HML,
RF = RF) %>%
mutate(
date = as.yearmon(as.character(date), "%Y%m"),
# Convert from percentage to decimals
Mkt_RF = Mkt_RF / 100,
SMB = SMB / 100,
HML = HML / 100,
RF = RF / 100
) %>%
filter(date >= as.yearmon("2010-01") & date <= as.yearmon(Sys.Date()))
head(ff3_monthly)
## # A tibble: 6 × 5
## date Mkt_RF SMB HML RF
## <yearmon> <dbl> <dbl> <dbl> <dbl>
## 1 Jan 2010 -0.0335 0.0043 0.0033 0
## 2 Feb 2010 0.0339 0.0118 0.0318 0
## 3 Mar 2010 0.063 0.0146 0.0219 0.0001
## 4 Apr 2010 0.0199 0.0484 0.0296 0.0001
## 5 May 2010 -0.079 0.0013 -0.0248 0.0001
## 6 Jun 2010 -0.0556 -0.0179 -0.0473 0.0001
Q5: Merge Monthly Returns with FF3 Factors
merged_tbl <- monthly_tbl %>%
inner_join(ff3_monthly, by = "date") %>%
arrange(date)
cat("Merged data dimensions:", dim(merged_tbl), "\n")
## Merged data dimensions: 196 13
head(merged_tbl)
## # A tibble: 6 × 13
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <yearmon> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Jan 2010 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 Feb 2010 0.0154 0.0347 -0.00890 0.0326 -0.0153 0.00570 0.0357 0.00997
## 3 Mar 2010 0.0500 0.0617 0.0631 0.0577 0.0556 -0.0201 0.0863 -0.00439
## 4 Apr 2010 0.00857 0.0224 -0.0271 0.0471 -0.0449 0.0358 0.0590 0.0463
## 5 May 2010 -0.0912 -0.0867 -0.0989 -0.0957 -0.118 0.0525 -0.0852 0.0272
## 6 Jun 2010 -0.0355 -0.0510 0.00447 -0.0486 -0.0108 0.0506 -0.0278 0.0148
## # ℹ 4 more variables: Mkt_RF <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
Q6: CAPM-Based GMV Portfolio (Single Period: 2010/02 – 2015/01)
# ── Helper: Global Minimum Variance weights ────────────────────────────────
gmv_weights <- function(cov_mat) {
n <- nrow(cov_mat)
Dmat <- 2 * cov_mat
dvec <- rep(0, n)
# Constraints: weights sum to 1 (Aeq * w = beq), no short-selling optional
Amat <- cbind(rep(1, n)) # equality: sum(w) = 1
bvec <- 1
result <- quadprog::solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
w <- result$solution
names(w) <- rownames(cov_mat)
return(w)
}
# ── CAPM covariance estimator ──────────────────────────────────────────────
capm_cov <- function(returns_df, ff_df) {
# returns_df: T x 8 asset returns; ff_df: T x (Mkt_RF, RF) aligned rows
n_assets <- ncol(returns_df)
T_obs <- nrow(returns_df)
excess_ret <- sweep(returns_df, 1, ff_df$RF, "-") # excess asset returns
mkt_rf <- ff_df$Mkt_RF
betas <- numeric(n_assets)
resid_var <- numeric(n_assets)
for (i in seq_len(n_assets)) {
fit <- lm(excess_ret[[i]] ~ mkt_rf)
betas[i] <- coef(fit)[2]
resid_var[i] <- var(residuals(fit))
}
sigma2_m <- var(mkt_rf)
# CAPM cov: Sigma = beta %*% t(beta) * sigma2_m + diag(resid_var)
cov_mat <- outer(betas, betas) * sigma2_m + diag(resid_var)
rownames(cov_mat) <- colnames(cov_mat) <- colnames(returns_df)
return(cov_mat)
}
# ── Filter training window: 2010/02 – 2015/01 (60 months) ─────────────────
train <- merged_tbl %>%
filter(date >= as.yearmon("2010-02") & date <= as.yearmon("2015-01"))
ret_train <- train %>% select(all_of(tickers))
ff_train <- train %>% select(Mkt_RF, RF)
# Compute CAPM covariance matrix
cov_capm <- capm_cov(ret_train, ff_train)
cat("CAPM Covariance Matrix (2010/02-2015/01):\n")
## CAPM Covariance Matrix (2010/02-2015/01):
round(cov_capm, 6)
## SPY QQQ EEM IWM EFA TLT IYR
## SPY 0.001483 0.001370 0.001560 0.001722 0.001442 -0.000930 0.001176
## QQQ 0.001370 0.001784 0.001634 0.001803 0.001510 -0.000974 0.001232
## EEM 0.001560 0.001634 0.003297 0.002053 0.001719 -0.001109 0.001402
## IWM 0.001722 0.001803 0.002053 0.003000 0.001897 -0.001224 0.001548
## EFA 0.001442 0.001510 0.001719 0.001897 0.002317 -0.001024 0.001296
## TLT -0.000930 -0.000974 -0.001109 -0.001224 -0.001024 0.001493 -0.000836
## IYR 0.001176 0.001232 0.001402 0.001548 0.001296 -0.000836 0.002188
## GLD 0.000201 0.000210 0.000239 0.000264 0.000221 -0.000143 0.000180
## GLD
## SPY 0.000201
## QQQ 0.000210
## EEM 0.000239
## IWM 0.000264
## EFA 0.000221
## TLT -0.000143
## IYR 0.000180
## GLD 0.002740
# Compute GMV weights
w_capm <- gmv_weights(cov_capm)
cat("\nCAPM GMV Weights (for allocation on 2015/02):\n")
##
## CAPM GMV Weights (for allocation on 2015/02):
round(w_capm, 4)
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.2709 0.1090 0.0068 -0.0195 0.0376 0.4576 0.0592 0.0784
# ── Realized return in 2015/02 ─────────────────────────────────────────────
ret_201502 <- merged_tbl %>%
filter(date == as.yearmon("2015-02")) %>%
select(all_of(tickers)) %>%
as.numeric()
realized_capm <- sum(w_capm * ret_201502)
cat(sprintf("\nRealized Portfolio Return (CAPM GMV) on 2015/02: %.4f (%.2f%%)\n",
realized_capm, realized_capm * 100))
##
## Realized Portfolio Return (CAPM GMV) on 2015/02: -0.0121 (-1.21%)
Q7: FF3-Based GMV Portfolio (Single Period: 2010/02 – 2015/01)
# ── FF3 covariance estimator ───────────────────────────────────────────────
ff3_cov <- function(returns_df, ff_df) {
n_assets <- ncol(returns_df)
excess_ret <- sweep(returns_df, 1, ff_df$RF, "-")
factors <- ff_df %>% select(Mkt_RF, SMB, HML)
B <- matrix(0, nrow = n_assets, ncol = 3) # factor loadings
resid_var <- numeric(n_assets)
for (i in seq_len(n_assets)) {
fit <- lm(excess_ret[[i]] ~ Mkt_RF + SMB + HML, data = factors)
B[i, ] <- coef(fit)[2:4]
resid_var[i] <- var(residuals(fit))
}
# Factor covariance matrix (3x3)
F_cov <- cov(factors)
# FF3 cov: Sigma = B %*% F_cov %*% t(B) + diag(resid_var)
cov_mat <- B %*% F_cov %*% t(B) + diag(resid_var)
rownames(cov_mat) <- colnames(cov_mat) <- colnames(returns_df)
return(cov_mat)
}
# Compute FF3 covariance matrix
ff_train_full <- train %>% select(Mkt_RF, SMB, HML, RF)
cov_ff3 <- ff3_cov(ret_train, ff_train_full)
cat("FF3 Covariance Matrix (2010/02-2015/01):\n")
## FF3 Covariance Matrix (2010/02-2015/01):
round(cov_ff3, 6)
## SPY QQQ EEM IWM EFA TLT IYR
## SPY 0.001483 0.001373 0.001561 0.001695 0.001455 -0.000924 0.001175
## QQQ 0.001373 0.001784 0.001671 0.001799 0.001540 -0.000912 0.001245
## EEM 0.001561 0.001671 0.003297 0.002066 0.001730 -0.001075 0.001411
## IWM 0.001695 0.001799 0.002066 0.003000 0.001745 -0.001271 0.001575
## EFA 0.001455 0.001540 0.001730 0.001745 0.002317 -0.000977 0.001290
## TLT -0.000924 -0.000912 -0.001075 -0.001271 -0.000977 0.001493 -0.000827
## IYR 0.001175 0.001245 0.001411 0.001575 0.001290 -0.000827 0.002188
## GLD 0.000197 0.000350 0.000324 0.000381 0.000233 -0.000025 0.000218
## GLD
## SPY 0.000197
## QQQ 0.000350
## EEM 0.000324
## IWM 0.000381
## EFA 0.000233
## TLT -0.000025
## IYR 0.000218
## GLD 0.002740
# Compute GMV weights
w_ff3 <- gmv_weights(cov_ff3)
cat("\nFF3 GMV Weights (for allocation on 2015/02):\n")
##
## FF3 GMV Weights (for allocation on 2015/02):
round(w_ff3, 4)
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.3281 0.0286 -0.0015 0.0295 0.0216 0.4688 0.0641 0.0608
# Realized return in 2015/02
realized_ff3 <- sum(w_ff3 * ret_201502)
cat(sprintf("\nRealized Portfolio Return (FF3 GMV) on 2015/02: %.4f (%.2f%%)\n",
realized_ff3, realized_ff3 * 100))
##
## Realized Portfolio Return (FF3 GMV) on 2015/02: -0.0132 (-1.32%)
Q8: Backtest – Rolling GMV Portfolios (CAPM vs FF3), 2015/02 –
2026/05
# ── Rolling window backtest ────────────────────────────────────────────────
# For each month t from 2015/02 onward:
# 1. Use returns from (t-60) to (t-1) to estimate covariance
# 2. Compute GMV weights
# 3. Record the realized return at month t
all_dates <- merged_tbl$date
start_bt <- as.yearmon("2015-02")
end_bt <- as.yearmon("2026-05")
bt_dates <- all_dates[all_dates >= start_bt & all_dates <= end_bt]
results <- tibble(
date = bt_dates,
ret_capm_gmv = NA_real_,
ret_ff3_gmv = NA_real_
)
for (i in seq_along(bt_dates)) {
t_month <- bt_dates[i]
# Training window: 60 months before t_month
train_end <- all_dates[all_dates < t_month]
if (length(train_end) < 60) next
train_end <- tail(train_end, 1)
train_start <- all_dates[which(all_dates == train_end) - 59]
win <- merged_tbl %>%
filter(date >= train_start & date <= train_end)
if (nrow(win) < 60) next
ret_w <- win %>% select(all_of(tickers))
ff_w <- win %>% select(Mkt_RF, SMB, HML, RF)
# CAPM GMV
tryCatch({
cov_c <- capm_cov(ret_w, ff_w)
w_c <- gmv_weights(cov_c)
ret_t <- merged_tbl %>% filter(date == t_month) %>%
select(all_of(tickers)) %>% as.numeric()
results$ret_capm_gmv[i] <- sum(w_c * ret_t)
}, error = function(e) NULL)
# FF3 GMV
tryCatch({
cov_f <- ff3_cov(ret_w, ff_w)
w_f <- gmv_weights(cov_f)
ret_t <- merged_tbl %>% filter(date == t_month) %>%
select(all_of(tickers)) %>% as.numeric()
results$ret_ff3_gmv[i] <- sum(w_f * ret_t)
}, error = function(e) NULL)
}
# ── Cumulative returns ─────────────────────────────────────────────────────
cum_results <- results %>%
drop_na() %>%
mutate(
cum_capm = cumprod(1 + ret_capm_gmv) - 1,
cum_ff3 = cumprod(1 + ret_ff3_gmv) - 1
)
# ── Summary statistics ─────────────────────────────────────────────────────
cat("=== Performance Summary (2015/02 – 2026/05) ===\n\n")
## === Performance Summary (2015/02 – 2026/05) ===
perf_summary <- cum_results %>%
summarise(
CAPM_Ann_Return = prod(1 + ret_capm_gmv)^(12/n()) - 1,
FF3_Ann_Return = prod(1 + ret_ff3_gmv)^(12/n()) - 1,
CAPM_Ann_Vol = sd(ret_capm_gmv) * sqrt(12),
FF3_Ann_Vol = sd(ret_ff3_gmv) * sqrt(12),
CAPM_Sharpe = CAPM_Ann_Return / CAPM_Ann_Vol,
FF3_Sharpe = FF3_Ann_Return / FF3_Ann_Vol,
CAPM_Cum_Return = last(cum_capm),
FF3_Cum_Return = last(cum_ff3)
)
print(t(round(perf_summary, 4)))
## [,1]
## CAPM_Ann_Return 0.0522
## FF3_Ann_Return 0.0322
## CAPM_Ann_Vol 0.1013
## FF3_Ann_Vol 0.1020
## CAPM_Sharpe 0.5156
## FF3_Sharpe 0.3159
## CAPM_Cum_Return 0.7730
## FF3_Cum_Return 0.4288
# ── Plot cumulative returns ────────────────────────────────────────────────
cum_results %>%
select(date, cum_capm, cum_ff3) %>%
pivot_longer(cols = c(cum_capm, cum_ff3),
names_to = "Model",
values_to = "Cumulative_Return") %>%
mutate(
Model = recode(Model,
cum_capm = "CAPM GMV",
cum_ff3 = "FF3 GMV"),
date = as.Date(date)
) %>%
ggplot(aes(x = date, y = Cumulative_Return, color = Model)) +
geom_line(linewidth = 1) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
scale_color_manual(values = c("CAPM GMV" = "#2C7BB6", "FF3 GMV" = "#D7191C")) +
labs(
title = "Cumulative Returns: GMV Portfolios (CAPM vs FF3)",
subtitle = "Rolling 60-month window | 2015/02 – 2026/05",
x = "Date",
y = "Cumulative Return",
color = "Model"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")

Session Info
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Asia/Irkutsk
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] frenchdata_0.2.0 quadprog_1.5-8
## [3] timetk_2.9.1 lubridate_1.9.4
## [5] forcats_1.0.1 stringr_1.5.2
## [7] dplyr_1.1.4 purrr_1.1.0
## [9] readr_2.1.5 tidyr_1.3.1
## [11] tibble_3.3.0 ggplot2_4.0.0
## [13] tidyverse_2.0.0 PerformanceAnalytics_2.1.0
## [15] quantmod_0.4.28 TTR_0.24.4
## [17] xts_0.14.1 zoo_1.8-14
## [19] tidyquant_1.0.12
##
## loaded via a namespace (and not attached):
## [1] rlang_1.1.6 magrittr_2.0.3 furrr_0.3.1
## [4] compiler_4.5.1 vctrs_0.6.5 lhs_1.2.1
## [7] rvest_1.0.5 tune_2.0.1 pkgconfig_2.0.3
## [10] crayon_1.5.3 fastmap_1.2.0 labeling_0.4.3
## [13] rmarkdown_2.29 prodlim_2025.04.28 tzdb_0.5.0
## [16] bit_4.6.0 xfun_0.52 cachem_1.1.0
## [19] jsonlite_2.0.0 recipes_1.3.1 parallel_4.5.1
## [22] R6_2.6.1 bslib_0.9.0 stringi_1.8.7
## [25] rsample_1.3.1 RColorBrewer_1.1-3 parallelly_1.45.1
## [28] rpart_4.1.24 jquerylib_0.1.4 Rcpp_1.1.0
## [31] assertthat_0.2.1 dials_1.4.2 knitr_1.50
## [34] future.apply_1.20.0 Matrix_1.7-3 splines_4.5.1
## [37] nnet_7.3-20 timechange_0.3.0 tidyselect_1.2.1
## [40] rstudioapi_0.17.1 yaml_2.3.10 timeDate_4051.111
## [43] codetools_0.2-20 curl_7.0.0 listenv_0.9.1
## [46] lattice_0.22-7 withr_3.0.2 S7_0.2.0
## [49] evaluate_1.0.5 future_1.67.0 survival_3.8-3
## [52] xml2_1.4.0 pillar_1.11.1 generics_0.1.4
## [55] vroom_1.6.6 hms_1.1.3 scales_1.4.0
## [58] globals_0.18.0 class_7.3-23 glue_1.8.0
## [61] tools_4.5.1 data.table_1.17.8 gower_1.0.2
## [64] fs_1.6.6 grid_4.5.1 yardstick_1.3.2
## [67] RobStatTM_1.0.11 ipred_0.9-15 cli_3.6.5
## [70] DiceDesign_1.10 workflows_1.3.0 parsnip_1.4.1
## [73] lava_1.8.2 gtable_0.3.6 GPfit_1.0-9
## [76] selectr_0.4-2 sass_0.4.10 digest_0.6.37
## [79] farver_2.1.2 htmltools_0.5.8.1 lifecycle_1.0.4
## [82] hardhat_1.4.2 httr_1.4.7 bit64_4.6.0-1
## [85] MASS_7.3-65