# Install any missing packages automatically
pkgs <- c("quantmod", "PerformanceAnalytics", "xts", "zoo",
"quadprog", "tidyverse", "lubridate")
pkgs_missing <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(pkgs_missing)) install.packages(pkgs_missing)
library(quantmod)
library(PerformanceAnalytics)
library(xts)
library(zoo)
library(quadprog)
library(tidyverse)
library(lubridate)
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
getSymbols(tickers,
src = "yahoo",
from = "2010-01-01",
to = "2025-12-31",
auto.assign = TRUE)
## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"
# Extract adjusted prices and merge into one xts object
adj_prices <- do.call(merge, lapply(tickers, function(tk) Ad(get(tk))))
colnames(adj_prices) <- tickers
head(adj_prices)
## SPY QQQ EEM IWM EFA TLT IYR
## 2010-01-04 84.79638 40.29078 30.35151 51.36657 35.12843 56.13521 26.76810
## 2010-01-05 85.02084 40.29078 30.57181 51.18992 35.15940 56.49770 26.83238
## 2010-01-06 85.08070 40.04774 30.63577 51.14177 35.30801 55.74138 26.82070
## 2010-01-07 85.43985 40.07381 30.45810 51.51911 35.17179 55.83516 27.06029
## 2010-01-08 85.72417 40.40362 30.69972 51.80010 35.45043 55.81015 26.87914
## 2010-01-11 85.84389 40.23873 30.63577 51.59135 35.74147 55.50389 27.00767
## GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
tail(adj_prices)
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 2025-12-22 682.9648 618.4302 54.01 253.1297 95.70 86.39350 93.34799 408.23
## 2025-12-23 686.0863 621.3265 54.31 251.6324 96.29 86.53194 93.27818 413.64
## 2025-12-24 688.4997 623.1443 54.42 252.2613 96.41 87.05609 93.95628 411.93
## 2025-12-26 688.4299 623.1043 54.80 250.9736 96.57 86.76929 94.05600 416.74
## 2025-12-29 685.9766 620.0881 54.66 249.4363 96.28 87.09565 94.23550 398.60
## 2025-12-30 685.1389 618.6499 54.88 247.5896 96.44 86.88796 94.44491 398.89
# Convert daily adjusted prices to end-of-month prices
monthly_prices <- to.monthly(adj_prices, indexAt = "lastof", OHLC = FALSE)
# Discrete (simple) monthly returns: R_t = (P_t / P_{t-1}) - 1
monthly_returns <- ROC(monthly_prices, type = "discrete")
monthly_returns <- na.omit(monthly_returns)
head(monthly_returns)
## SPY QQQ EEM IWM EFA
## 2010-02-28 0.03119537 0.04603911 0.017763489 0.04475136 0.002667386
## 2010-03-31 0.06087946 0.07710897 0.081108997 0.08230684 0.063854328
## 2010-04-30 0.01546998 0.02242518 -0.001661812 0.05678476 -0.028045554
## 2010-05-31 -0.07945456 -0.07392366 -0.093935673 -0.07536655 -0.111928092
## 2010-06-30 -0.05174120 -0.05975688 -0.013986635 -0.07743393 -0.020619541
## 2010-07-31 0.06830097 0.07258303 0.109324837 0.06730911 0.116104123
## TLT IYR GLD
## 2010-02-28 -0.003424170 0.05457073 0.032748219
## 2010-03-31 -0.020573618 0.09748538 -0.004386396
## 2010-04-30 0.033218702 0.06388075 0.058834363
## 2010-05-31 0.051083627 -0.05683529 0.030513147
## 2010-06-30 0.057977479 -0.04670110 0.023553189
## 2010-07-31 -0.009462935 0.09404778 -0.050871157
ff_url <- paste0(
"https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/",
"ftp/F-F_Research_Data_Factors_CSV.zip"
)
tmp <- tempfile(fileext = ".zip")
download.file(ff_url, tmp, mode = "wb", quiet = TRUE)
# Check actual filename inside the zip (avoids hard-coding a name that may change)
zip_contents <- unzip(tmp, list = TRUE)
csv_name <- zip_contents$Name[1]
cat("File inside zip:", csv_name, "\n")
## File inside zip: F-F_Research_Data_Factors.csv
# Read the CSV (skip the 3 header rows)
ff_raw <- read.csv(unz(tmp, csv_name),
skip = 3, header = TRUE, stringsAsFactors = FALSE)
# Keep only monthly rows (date column is 6 digits: YYYYMM)
ff_monthly <- ff_raw[nchar(trimws(ff_raw[, 1])) == 6, ]
# Rename columns
colnames(ff_monthly)[1:5] <- c("Date", "Mkt_RF", "SMB", "HML", "RF")
# Convert to numeric and scale from percentage to decimal
ff_monthly$Date <- as.character(trimws(ff_monthly$Date))
ff_monthly$Mkt_RF <- as.numeric(ff_monthly$Mkt_RF) / 100
ff_monthly$SMB <- as.numeric(ff_monthly$SMB) / 100
ff_monthly$HML <- as.numeric(ff_monthly$HML) / 100
ff_monthly$RF <- as.numeric(ff_monthly$RF) / 100
# Create end-of-month Date column
ff_monthly$Date <- as.Date(paste0(ff_monthly$Date, "01"), format = "%Y%m%d")
ff_monthly$Date <- as.Date(format(ff_monthly$Date + 31, "%Y-%m-01")) - 1
# Drop any rows where Date conversion failed
ff_monthly <- ff_monthly[!is.na(ff_monthly$Date), ]
# Convert to xts and filter to our window
ff_xts <- xts(ff_monthly[, c("Mkt_RF", "SMB", "HML", "RF")],
order.by = ff_monthly$Date)
ff_xts <- ff_xts["2010/2025"]
head(ff_xts)
## Mkt_RF SMB HML RF
## 2010-01-31 -0.0335 0.0043 0.0033 0e+00
## 2010-02-28 0.0339 0.0118 0.0318 0e+00
## 2010-03-31 0.0630 0.0146 0.0219 1e-04
## 2010-04-30 0.0199 0.0484 0.0296 1e-04
## 2010-05-31 -0.0790 0.0013 -0.0248 1e-04
## 2010-06-30 -0.0556 -0.0179 -0.0473 1e-04
# Align by date index (inner join keeps only overlapping dates)
merged_data <- merge(monthly_returns, ff_xts, join = "inner")
merged_data <- na.omit(merged_data)
cat("Rows in merged dataset:", nrow(merged_data), "\n")
## Rows in merged dataset: 191
cat("Date range:",
as.character(index(merged_data)[1]),
"to",
as.character(tail(index(merged_data), 1)), "\n")
## Date range: 2010-02-28 to 2025-12-31
head(merged_data)
## SPY QQQ EEM IWM EFA
## 2010-02-28 0.03119537 0.04603911 0.017763489 0.04475136 0.002667386
## 2010-03-31 0.06087946 0.07710897 0.081108997 0.08230684 0.063854328
## 2010-04-30 0.01546998 0.02242518 -0.001661812 0.05678476 -0.028045554
## 2010-05-31 -0.07945456 -0.07392366 -0.093935673 -0.07536655 -0.111928092
## 2010-06-30 -0.05174120 -0.05975688 -0.013986635 -0.07743393 -0.020619541
## 2010-07-31 0.06830097 0.07258303 0.109324837 0.06730911 0.116104123
## TLT IYR GLD Mkt_RF SMB HML RF
## 2010-02-28 -0.003424170 0.05457073 0.032748219 0.0339 0.0118 0.0318 0e+00
## 2010-03-31 -0.020573618 0.09748538 -0.004386396 0.0630 0.0146 0.0219 1e-04
## 2010-04-30 0.033218702 0.06388075 0.058834363 0.0199 0.0484 0.0296 1e-04
## 2010-05-31 0.051083627 -0.05683529 0.030513147 -0.0790 0.0013 -0.0248 1e-04
## 2010-06-30 0.057977479 -0.04670110 0.023553189 -0.0556 -0.0179 -0.0473 1e-04
## 2010-07-31 -0.009462935 0.09404778 -0.050871157 0.0692 0.0022 -0.0050 1e-04
The CAPM model is: \[R_{i,t} - R_{f,t} = \alpha_i + \beta_i (R_{m,t} - R_{f,t}) + \varepsilon_{i,t}\]
The CAPM-implied covariance matrix is: \[\Sigma = \beta \beta^\top \sigma^2_{m} + D\] where \(D\) is a diagonal matrix of residual variances.
window_start <- "2020-03-01"
window_end <- "2025-02-28"
etf_window <- merged_data[paste0(window_start, "/", window_end), tickers]
ff_window <- merged_data[paste0(window_start, "/", window_end),
c("Mkt_RF", "SMB", "HML", "RF")]
cat("Window rows:", nrow(etf_window), "\n") # should be ~60
## Window rows: 60
# Excess returns
excess_etf <- etf_window - as.numeric(ff_window[, "RF"])
mkt_rf <- ff_window[, "Mkt_RF"]
# Estimate CAPM betas
betas_capm <- sapply(tickers, function(tk) {
fit <- lm(as.numeric(excess_etf[, tk]) ~ as.numeric(mkt_rf))
coef(fit)[2]
})
# Residual variances
resid_var_capm <- sapply(tickers, function(tk) {
fit <- lm(as.numeric(excess_etf[, tk]) ~ as.numeric(mkt_rf))
var(resid(fit))
})
# Market variance
mkt_var <- var(as.numeric(mkt_rf))
# CAPM covariance matrix
sigma_capm <- betas_capm %o% betas_capm * mkt_var + diag(resid_var_capm)
rownames(sigma_capm) <- colnames(sigma_capm) <- tickers
cat("CAPM Covariance Matrix (annualized %):\n")
## CAPM Covariance Matrix (annualized %):
print(round(sigma_capm * 12 * 100, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## SPY 3.1480 3.4605 2.2658 3.8589 2.6826 1.0773 3.2659 0.5681
## QQQ 3.4605 4.5496 2.5223 4.2957 2.9863 1.1992 3.6356 0.6324
## EEM 2.2658 2.5223 3.2545 2.8127 1.9553 0.7852 2.3805 0.4141
## IWM 3.8589 4.2957 2.8127 5.9793 3.3301 1.3373 4.0542 0.7052
## EFA 2.6826 2.9863 1.9553 3.3301 3.1422 0.9297 2.8183 0.4903
## TLT 1.0773 1.1992 0.7852 1.3373 0.9297 2.3214 1.1318 0.1969
## IYR 3.2659 3.6356 2.3805 4.0542 2.8183 1.1318 4.6658 0.5969
## GLD 0.5681 0.6324 0.4141 0.7052 0.4903 0.1969 0.5969 2.0799
The Minimum Variance Portfolio solves: \[\min_w \; w^\top \Sigma w \quad \text{s.t.} \quad \mathbf{1}^\top w = 1, \quad w \ge 0\]
mvp_weights <- function(sigma_mat, allow_short = FALSE) {
n <- nrow(sigma_mat)
ones <- rep(1, n)
if (!allow_short) {
# Quadratic programming with non-negativity constraints
Dmat <- 2 * sigma_mat
dvec <- rep(0, n)
Amat <- cbind(ones, diag(n))
bvec <- c(1, rep(0, n))
sol <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
w <- sol$solution
} else {
# Analytical solution (short sales allowed)
w <- solve(sigma_mat, ones) /
as.numeric(t(ones) %*% solve(sigma_mat, ones))
}
names(w) <- rownames(sigma_mat)
w
}
# CAPM-based MVP weights (long-only)
w_capm <- mvp_weights(sigma_capm)
cat("MVP Weights from CAPM Covariance:\n")
## MVP Weights from CAPM Covariance:
print(round(w_capm, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.0000 0.0000 0.1401 0.0000 0.0838 0.3425 0.0000 0.4336
barplot(w_capm, main = "MVP Weights — CAPM", col = "steelblue",
ylab = "Weight", las = 2)
ret_mar2025 <- merged_data["2025-03", tickers]
if (nrow(ret_mar2025) == 0) {
cat("March 2025 not in dataset — check download end date.\n")
} else {
r_mar2025 <- as.numeric(ret_mar2025)
mvp_ret_capm_mar <- sum(w_capm * r_mar2025)
cat("Realized MVP Return (CAPM) — March 2025:",
round(mvp_ret_capm_mar * 100, 4), "%\n")
}
## Realized MVP Return (CAPM) — March 2025: 3.8576 %
The FF3 model is: \[R_{i,t} - R_{f,t} = \alpha_i + \beta_{i,m}(R_m - R_f) + \beta_{i,s}\text{SMB}_t + \beta_{i,h}\text{HML}_t + \varepsilon_{i,t}\]
The FF3-implied covariance matrix is: \[\Sigma = B \Sigma_F B^\top + D\] where \(B\) is the \(n \times 3\) factor-loading matrix, \(\Sigma_F\) is the \(3 \times 3\) factor covariance, and \(D\) is diagonal residual variance.
factors <- as.data.frame(ff_window[, c("Mkt_RF", "SMB", "HML")])
# Factor betas for each ETF
B_ff3 <- t(sapply(tickers, function(tk) {
fit <- lm(as.numeric(excess_etf[, tk]) ~ ., data = factors)
coef(fit)[-1] # drop intercept
}))
colnames(B_ff3) <- c("Mkt_RF", "SMB", "HML")
# Factor covariance matrix (3×3)
Sigma_F <- cov(factors)
# Residual variances from FF3
resid_var_ff3 <- sapply(tickers, function(tk) {
fit <- lm(as.numeric(excess_etf[, tk]) ~ ., data = factors)
var(resid(fit))
})
# FF3 covariance matrix
sigma_ff3 <- B_ff3 %*% Sigma_F %*% t(B_ff3) + diag(resid_var_ff3)
rownames(sigma_ff3) <- colnames(sigma_ff3) <- tickers
# FF3-based MVP weights (long-only)
w_ff3 <- mvp_weights(sigma_ff3)
cat("MVP Weights from FF3 Covariance:\n")
## MVP Weights from FF3 Covariance:
print(round(w_ff3, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.0000 0.0000 0.1565 0.0000 0.0821 0.3391 0.0000 0.4223
barplot(w_ff3, main = "MVP Weights — FF3", col = "coral",
ylab = "Weight", las = 2)
if (nrow(ret_mar2025) > 0) {
r_mar2025 <- as.numeric(ret_mar2025)
mvp_ret_capm_mar <- sum(w_capm * r_mar2025)
mvp_ret_ff3_mar <- sum(w_ff3 * r_mar2025)
results_mar <- data.frame(
Model = c("CAPM", "FF3"),
Return_pct = round(c(mvp_ret_capm_mar, mvp_ret_ff3_mar) * 100, 4)
)
cat("=== Realized MVP Returns — March 2025 ===\n")
print(results_mar)
} else {
cat("March 2025 returns not available.\n")
}
## === Realized MVP Returns — March 2025 ===
## Model Return_pct
## 1 CAPM 3.8576
## 2 FF3 3.7730
For April 2025, the covariance is re-estimated using the window 2020/04–2025/03.
window2_start <- "2020-04-01"
window2_end <- "2025-03-31"
etf_w2 <- merged_data[paste0(window2_start, "/", window2_end), tickers]
ff_w2 <- merged_data[paste0(window2_start, "/", window2_end),
c("Mkt_RF", "SMB", "HML", "RF")]
if (nrow(etf_w2) < 60) {
cat("Rolling window for April 2025 has only", nrow(etf_w2),
"months — March 2025 data may be needed.\n")
} else {
excess_etf2 <- etf_w2 - as.numeric(ff_w2[, "RF"])
mkt_rf2 <- ff_w2[, "Mkt_RF"]
factors2 <- as.data.frame(ff_w2[, c("Mkt_RF", "SMB", "HML")])
# ---- CAPM ----
betas2_capm <- sapply(tickers, function(tk) {
coef(lm(as.numeric(excess_etf2[, tk]) ~ as.numeric(mkt_rf2)))[2]
})
resid2_capm <- sapply(tickers, function(tk) {
var(resid(lm(as.numeric(excess_etf2[, tk]) ~ as.numeric(mkt_rf2))))
})
sigma2_capm <- betas2_capm %o% betas2_capm * var(as.numeric(mkt_rf2)) +
diag(resid2_capm)
rownames(sigma2_capm) <- colnames(sigma2_capm) <- tickers
w2_capm <- mvp_weights(sigma2_capm)
# ---- FF3 ----
B2_ff3 <- t(sapply(tickers, function(tk) {
coef(lm(as.numeric(excess_etf2[, tk]) ~ ., data = factors2))[-1]
}))
Sigma2_F <- cov(factors2)
resid2_ff3 <- sapply(tickers, function(tk) {
var(resid(lm(as.numeric(excess_etf2[, tk]) ~ ., data = factors2)))
})
sigma2_ff3 <- B2_ff3 %*% Sigma2_F %*% t(B2_ff3) + diag(resid2_ff3)
rownames(sigma2_ff3) <- colnames(sigma2_ff3) <- tickers
w2_ff3 <- mvp_weights(sigma2_ff3)
# April 2025 actual returns
ret_apr2025 <- merged_data["2025-04", tickers]
if (nrow(ret_apr2025) == 0) {
cat("April 2025 not yet in dataset.\n")
cat("New CAPM weights (for April 2025):\n"); print(round(w2_capm, 4))
cat("New FF3 weights (for April 2025):\n"); print(round(w2_ff3, 4))
} else {
r_apr2025 <- as.numeric(ret_apr2025)
mvp_ret2_capm <- sum(w2_capm * r_apr2025)
mvp_ret2_ff3 <- sum(w2_ff3 * r_apr2025)
results_apr <- data.frame(
Model = c("CAPM", "FF3"),
Return_pct = round(c(mvp_ret2_capm, mvp_ret2_ff3) * 100, 4)
)
cat("=== Realized MVP Returns — April 2025 ===\n")
print(results_apr)
}
}
## === Realized MVP Returns — April 2025 ===
## Model Return_pct
## 1 CAPM 2.1839
## 2 FF3 2.1333
weight_df <- data.frame(
Ticker = tickers,
CAPM_Weight = round(w_capm, 4),
FF3_Weight = round(w_ff3, 4)
)
knitr::kable(weight_df,
caption = "MVP Optimal Weights: CAPM vs FF3 (2020/03–2025/02)")
| Ticker | CAPM_Weight | FF3_Weight | |
|---|---|---|---|
| SPY | SPY | 0.0000 | 0.0000 |
| QQQ | QQQ | 0.0000 | 0.0000 |
| EEM | EEM | 0.1401 | 0.1565 |
| IWM | IWM | 0.0000 | 0.0000 |
| EFA | EFA | 0.0838 | 0.0821 |
| TLT | TLT | 0.3425 | 0.3391 |
| IYR | IYR | 0.0000 | 0.0000 |
| GLD | GLD | 0.4336 | 0.4223 |
Textbook questions (Part II) are answered separately in this document.