Q1: Import ETF Data
library(tidyquant)
library(lubridate)
library(timetk)
library(purrr)
library(tibble)
library(dplyr)
library(quadprog)
library(ggplot2)
library(tidyr)
library(xts)
tickers <- c("SPY","QQQ","EEM","IWM","EFA","TLT","IYR","GLD")
raw <- tq_get(tickers,
from = "2010-01-01",
to = Sys.Date(),
get = "stock.prices")
# Wide tibble of adjusted prices
prices_tbl <- raw %>%
select(symbol, date, adjusted) %>%
pivot_wider(names_from = symbol, values_from = adjusted) %>%
arrange(date)
# Build xts explicitly — extract numeric matrix + Date index
prices_xts <- xts(
x = as.matrix(prices_tbl[ , tickers]), # numeric columns only
order.by = as.Date(prices_tbl$date)
)
storage.mode(prices_xts) <- "double" # ensure double, not list
head(prices_xts)
## SPY QQQ EEM IWM EFA TLT IYR
## 2010-01-04 84.79640 40.29080 30.35151 51.36655 35.12843 55.70950 26.76811
## 2010-01-05 85.02082 40.29080 30.57180 51.18993 35.15941 56.06935 26.83239
## 2010-01-06 85.08069 40.04776 30.63576 51.14177 35.30801 55.31871 26.82070
## 2010-01-07 85.43985 40.07379 30.45811 51.51912 35.17179 55.41175 27.06028
## 2010-01-08 85.72417 40.40363 30.69972 51.80010 35.45043 55.38699 26.87913
## 2010-01-11 85.84387 40.23872 30.63576 51.59135 35.74147 55.08301 27.00767
## GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
tail(prices_xts)
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 2026-06-02 759.57 746.16 70.80 291.66 105.02 85.65 99.99 411.95
## 2026-06-03 754.24 744.21 69.92 287.67 104.12 85.31 100.00 407.87
## 2026-06-04 757.09 740.61 69.10 292.01 104.95 85.50 101.79 411.27
## 2026-06-05 737.55 705.06 64.59 281.65 102.26 85.06 102.54 396.24
## 2026-06-08 739.22 716.07 65.75 284.11 102.88 84.62 101.08 397.27
## 2026-06-09 737.05 707.83 65.82 285.02 102.90 85.12 103.49 390.78
Q2: Weekly and Monthly Simple Returns
# Simple return function: last price / first price - 1
simple_ret <- function(x) {
apply(x, 2, function(col) col[length(col)] / col[1] - 1)
}
weekly_ret <- apply.weekly(prices_xts, simple_ret)
monthly_ret <- apply.monthly(prices_xts, simple_ret)
head(weekly_ret)
## SPY QQQ EEM IWM EFA
## 2010-01-08 0.01094109 0.002800229 0.01147255 0.008440203 0.009166324
## 2010-01-15 -0.00950023 -0.011000880 -0.02690761 -0.009025638 -0.011607773
## 2010-01-22 -0.05084316 -0.052157269 -0.07453269 -0.048110651 -0.064919557
## 2010-01-29 -0.02168152 -0.034303656 -0.04060194 -0.027346137 -0.037594241
## 2010-02-05 -0.02200620 -0.006472222 -0.05367579 -0.025484808 -0.036676827
## 2010-02-12 0.02030434 0.025545225 0.04371427 0.039877305 0.025564866
## TLT IYR GLD
## 2010-01-08 -0.005789203 0.004147369 0.01429872
## 2010-01-15 0.025676468 -0.011033914 -0.01763401
## 2010-01-22 0.012991873 -0.060489016 -0.03900644
## 2010-01-29 0.007970863 -0.015639456 -0.01414221
## 2010-02-05 0.009106177 -0.014699176 -0.03387170
## 2010-02-12 -0.020737426 0.015500282 0.02883506
head(monthly_ret)
## SPY QQQ EEM IWM EFA
## 2010-01-29 -0.052413555 -0.07819917 -0.103722796 -0.06048732 -0.07491626
## 2010-02-26 0.015404464 0.03467413 -0.008903633 0.03255515 -0.01534469
## 2010-03-31 0.049975723 0.06169173 0.063099437 0.05771658 0.05562872
## 2010-04-30 0.008574028 0.02242545 -0.027071305 0.04705514 -0.04493576
## 2010-05-28 -0.091233875 -0.08672176 -0.098864825 -0.09568721 -0.11824828
## 2010-06-30 -0.035514642 -0.05101632 0.004468037 -0.04856824 -0.01079274
## TLT IYR GLD
## 2010-01-29 0.027836790 -0.05195409 -0.034972713
## 2010-02-26 0.005704983 0.03573005 0.009967714
## 2010-03-31 -0.020145070 0.08633625 -0.004386396
## 2010-04-30 0.035749667 0.05898811 0.046254293
## 2010-05-28 0.052459973 -0.08516431 0.027218472
## 2010-06-30 0.050595019 -0.02782228 0.014761042
Q3: Monthly Returns as Tibble
monthly_tbl <- tk_tbl(monthly_ret, rename_index = "date") %>%
mutate(date = floor_date(as.Date(date), "month"))
head(monthly_tbl)
## # A tibble: 6 × 9
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-01 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 2010-02-01 0.0154 0.0347 -0.00890 0.0326 -0.0153 0.00570 0.0357 0.00997
## 3 2010-03-01 0.0500 0.0617 0.0631 0.0577 0.0556 -0.0201 0.0863 -0.00439
## 4 2010-04-01 0.00857 0.0224 -0.0271 0.0471 -0.0449 0.0357 0.0590 0.0463
## 5 2010-05-01 -0.0912 -0.0867 -0.0989 -0.0957 -0.118 0.0525 -0.0852 0.0272
## 6 2010-06-01 -0.0355 -0.0510 0.00447 -0.0486 -0.0108 0.0506 -0.0278 0.0148
Q4: Fama-French 3-Factor Data
# Download FF3 monthly data
ff3_raw <- tq_get("F-F_Research_Data_Factors",
get = "economic.data", # tidyquant has FF via frenchdata pkg
from = "2010-01-01")
# Alternative: use frenchdata package
if (!requireNamespace("frenchdata", quietly = TRUE)) {
install.packages("frenchdata")
}
library(frenchdata)
ff3_data <- download_french_data("Fama/French 3 Factors")
ff3_monthly <- ff3_data$subsets$data[[1]] %>% # Monthly data
mutate(date = as.Date(paste0(date, "01"), "%Y%m%d"),
date = floor_date(date, "month")) %>%
filter(date >= "2010-01-01") %>%
mutate(across(c(`Mkt-RF`, SMB, HML, RF), ~ as.numeric(.) / 100))
head(ff3_monthly)
## # A tibble: 6 × 5
## date `Mkt-RF` SMB HML RF
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-01 -0.0335 0.0043 0.0033 0
## 2 2010-02-01 0.0339 0.0118 0.0318 0
## 3 2010-03-01 0.063 0.0146 0.0219 0.0001
## 4 2010-04-01 0.0199 0.0484 0.0296 0.0001
## 5 2010-05-01 -0.079 0.0013 -0.0248 0.0001
## 6 2010-06-01 -0.0556 -0.0179 -0.0473 0.0001
Q5: Merge Monthly Returns with FF3 Factors
merged_tbl <- monthly_tbl %>%
inner_join(ff3_monthly, by = "date")
head(merged_tbl)
## # A tibble: 6 × 13
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-01 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 2010-02-01 0.0154 0.0347 -0.00890 0.0326 -0.0153 0.00570 0.0357 0.00997
## 3 2010-03-01 0.0500 0.0617 0.0631 0.0577 0.0556 -0.0201 0.0863 -0.00439
## 4 2010-04-01 0.00857 0.0224 -0.0271 0.0471 -0.0449 0.0357 0.0590 0.0463
## 5 2010-05-01 -0.0912 -0.0867 -0.0989 -0.0957 -0.118 0.0525 -0.0852 0.0272
## 6 2010-06-01 -0.0355 -0.0510 0.00447 -0.0486 -0.0108 0.0506 -0.0278 0.0148
## # ℹ 4 more variables: `Mkt-RF` <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
dim(merged_tbl)
## [1] 196 13
Q6: CAPM-Based GMV Portfolio (Single Period: 2010/02–2015/01)
# Helper: compute GMV weights via quadprog
gmv_weights <- function(cov_mat) {
n <- ncol(cov_mat)
Dmat <- 2 * cov_mat
dvec <- rep(0, n)
# Constraints: sum(w) = 1, w >= 0 (long only)
Amat <- cbind(rep(1, n), diag(n))
bvec <- c(1, rep(0, n))
sol <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
sol$solution
}
# Helper: CAPM covariance matrix
# Cov(i,j) = beta_i * beta_j * Var(Rm) + [if i==j: sigma_eps_i^2]
capm_cov <- function(ret_mat, mkt_excess) {
n <- ncol(ret_mat)
betas <- numeric(n)
residvar <- numeric(n)
var_mkt <- var(mkt_excess)
for (i in seq_len(n)) {
fit <- lm(ret_mat[, i] ~ mkt_excess)
betas[i] <- coef(fit)[2]
residvar[i] <- var(resid(fit))
}
# Cov matrix
cov_mat <- betas %o% betas * var_mkt
diag(cov_mat) <- diag(cov_mat) + residvar
colnames(cov_mat) <- rownames(cov_mat) <- colnames(ret_mat)
cov_mat
}
# Training window: 2010/02 – 2015/01 (60 months)
train <- merged_tbl %>%
filter(date >= "2010-02-01", date <= "2015-01-01")
ret_mat_train <- train %>% select(all_of(tickers)) %>% as.matrix()
mkt_ex_train <- train$`Mkt-RF`
cov_capm <- capm_cov(ret_mat_train, mkt_ex_train)
w_capm <- gmv_weights(cov_capm)
names(w_capm) <- tickers
cat("CAPM GMV Weights (2015/01):\n")
## CAPM GMV Weights (2015/01):
print(round(w_capm, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.2607 0.1035 0.0052 0.0000 0.0347 0.4598 0.0578 0.0783
# Realized return 2015/02
ret_201502 <- merged_tbl %>%
filter(date == "2015-02-01") %>%
select(all_of(tickers)) %>%
as.matrix()
realized_capm <- sum(w_capm * ret_201502)
cat(sprintf("\nRealized portfolio return (CAPM GMV) on 2015/02: %.4f (%.2f%%)\n",
realized_capm, realized_capm * 100))
##
## Realized portfolio return (CAPM GMV) on 2015/02: -0.0122 (-1.22%)
Q7: FF3-Based GMV Portfolio (Single Period)
# FF3 covariance matrix
ff3_cov <- function(ret_mat, factors_mat) {
n <- ncol(ret_mat)
k <- ncol(factors_mat) # 3 factors
betas <- matrix(0, n, k)
residvar <- numeric(n)
for (i in seq_len(n)) {
fit <- lm(ret_mat[, i] ~ factors_mat)
betas[i, ] <- coef(fit)[-1]
residvar[i] <- var(resid(fit))
}
F_cov <- cov(factors_mat)
cov_sys <- betas %*% F_cov %*% t(betas)
cov_mat <- cov_sys
diag(cov_mat) <- diag(cov_mat) + residvar
colnames(cov_mat) <- rownames(cov_mat) <- colnames(ret_mat)
cov_mat
}
factors_train <- train %>%
select(`Mkt-RF`, SMB, HML) %>%
as.matrix()
cov_ff3 <- ff3_cov(ret_mat_train, factors_train)
w_ff3 <- gmv_weights(cov_ff3)
names(w_ff3) <- tickers
cat("FF3 GMV Weights (2015/01):\n")
## FF3 GMV Weights (2015/01):
print(round(w_ff3, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.3279 0.0282 0.0000 0.0289 0.0214 0.4688 0.0640 0.0608
realized_ff3 <- sum(w_ff3 * ret_201502)
cat(sprintf("\nRealized portfolio return (FF3 GMV) on 2015/02: %.4f (%.2f%%)\n",
realized_ff3, realized_ff3 * 100))
##
## Realized portfolio return (FF3 GMV) on 2015/02: -0.0132 (-1.32%)
Q8: Rolling Backtest — GMV Portfolios (2015/02 – 2026/05)
# All months in the dataset
all_dates <- merged_tbl$date
# Rolling window: 60 months prior to each rebalance date
backtest_start <- as.Date("2015-02-01")
backtest_end <- as.Date("2026-05-01")
rebalance_dates <- all_dates[all_dates >= backtest_start &
all_dates <= backtest_end]
results <- map_dfr(rebalance_dates, function(t) {
# Training window: 60 months ending at t-1
train_end <- all_dates[all_dates < t]
if (length(train_end) < 60) return(NULL)
train_start <- tail(train_end, 60)[1]
win <- merged_tbl %>%
filter(date >= train_start, date <= tail(train_end, 1))
r_mat <- win %>% select(all_of(tickers)) %>% as.matrix()
f_mat <- win %>% select(`Mkt-RF`, SMB, HML) %>% as.matrix()
mkt_ex <- win$`Mkt-RF`
# Compute covariances and weights
cov_c <- tryCatch(capm_cov(r_mat, mkt_ex), error = function(e) NULL)
cov_f <- tryCatch(ff3_cov(r_mat, f_mat), error = function(e) NULL)
if (is.null(cov_c) || is.null(cov_f)) return(NULL)
w_c <- tryCatch(gmv_weights(cov_c), error = function(e) NULL)
w_f <- tryCatch(gmv_weights(cov_f), error = function(e) NULL)
if (is.null(w_c) || is.null(w_f)) return(NULL)
# Realized return at t
ret_t <- merged_tbl %>%
filter(date == t) %>%
select(all_of(tickers)) %>%
as.matrix()
if (nrow(ret_t) == 0) return(NULL)
tibble(
date = t,
ret_capm = sum(w_c * ret_t),
ret_ff3 = sum(w_f * ret_t)
)
})
# Cumulative returns
results <- results %>%
mutate(
cum_capm = cumprod(1 + ret_capm) - 1,
cum_ff3 = cumprod(1 + ret_ff3) - 1
)
# Plot
results %>%
select(date, cum_capm, cum_ff3) %>%
pivot_longer(-date, names_to = "Model", values_to = "Cumulative_Return") %>%
mutate(Model = recode(Model,
cum_capm = "CAPM GMV",
cum_ff3 = "FF3 GMV")) %>%
ggplot(aes(x = date, y = Cumulative_Return, color = Model)) +
geom_line(linewidth = 1) +
scale_y_continuous(labels = scales::percent) +
labs(title = "Cumulative Returns: GMV Portfolios (2015/02 – 2026/05)",
subtitle = "Rolling 60-month estimation window, rebalanced monthly",
x = "Date", y = "Cumulative Return",
color = "Model") +
theme_minimal(base_size = 13) +
theme(legend.position = "bottom")
