Setup: Install & Load Libraries

# Install any missing packages automatically
pkgs <- c("quantmod", "PerformanceAnalytics", "xts", "zoo",
          "quadprog", "tidyverse", "lubridate")
pkgs_missing <- pkgs[!pkgs %in% installed.packages()[, "Package"]]
if (length(pkgs_missing)) install.packages(pkgs_missing)

library(quantmod)
library(PerformanceAnalytics)
library(xts)
library(zoo)
library(quadprog)
library(tidyverse)
library(lubridate)

Q1 — Download ETF Daily Adjusted Prices (2010–2025)

tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

getSymbols(tickers,
           src         = "yahoo",
           from        = "2010-01-01",
           to          = "2025-12-31",
           auto.assign = TRUE)
## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"
# Extract adjusted prices and merge into one xts object
adj_prices <- do.call(merge, lapply(tickers, function(tk) Ad(get(tk))))
colnames(adj_prices) <- tickers

head(adj_prices)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 84.79638 40.29078 30.35151 51.36657 35.12843 56.13521 26.76810
## 2010-01-05 85.02084 40.29078 30.57181 51.18992 35.15940 56.49770 26.83238
## 2010-01-06 85.08070 40.04774 30.63577 51.14177 35.30801 55.74138 26.82070
## 2010-01-07 85.43985 40.07381 30.45810 51.51911 35.17179 55.83516 27.06029
## 2010-01-08 85.72417 40.40362 30.69972 51.80010 35.45043 55.81015 26.87914
## 2010-01-11 85.84389 40.23873 30.63577 51.59135 35.74147 55.50389 27.00767
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
tail(adj_prices)
##                 SPY      QQQ   EEM      IWM   EFA      TLT      IYR    GLD
## 2025-12-22 682.9648 618.4302 54.01 253.1297 95.70 86.39350 93.34799 408.23
## 2025-12-23 686.0863 621.3265 54.31 251.6324 96.29 86.53194 93.27818 413.64
## 2025-12-24 688.4997 623.1443 54.42 252.2613 96.41 87.05609 93.95628 411.93
## 2025-12-26 688.4299 623.1043 54.80 250.9736 96.57 86.76929 94.05600 416.74
## 2025-12-29 685.9766 620.0881 54.66 249.4363 96.28 87.09565 94.23550 398.60
## 2025-12-30 685.1389 618.6499 54.88 247.5896 96.44 86.88796 94.44491 398.89

Q2 — Calculate Monthly Discrete Returns

# Convert daily adjusted prices to end-of-month prices
monthly_prices <- to.monthly(adj_prices, indexAt = "lastof", OHLC = FALSE)

# Discrete (simple) monthly returns: R_t = (P_t / P_{t-1}) - 1
monthly_returns <- ROC(monthly_prices, type = "discrete")
monthly_returns <- na.omit(monthly_returns)

head(monthly_returns)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-02-28  0.03119537  0.04603911  0.017763489  0.04475136  0.002667386
## 2010-03-31  0.06087946  0.07710897  0.081108997  0.08230684  0.063854328
## 2010-04-30  0.01546998  0.02242518 -0.001661812  0.05678476 -0.028045554
## 2010-05-31 -0.07945456 -0.07392366 -0.093935673 -0.07536655 -0.111928092
## 2010-06-30 -0.05174120 -0.05975688 -0.013986635 -0.07743393 -0.020619541
## 2010-07-31  0.06830097  0.07258303  0.109324837  0.06730911  0.116104123
##                     TLT         IYR          GLD
## 2010-02-28 -0.003424170  0.05457073  0.032748219
## 2010-03-31 -0.020573618  0.09748538 -0.004386396
## 2010-04-30  0.033218702  0.06388075  0.058834363
## 2010-05-31  0.051083627 -0.05683529  0.030513147
## 2010-06-30  0.057977479 -0.04670110  0.023553189
## 2010-07-31 -0.009462935  0.09404778 -0.050871157

Q3 — Download Fama-French 3-Factor Data

ff_url <- paste0(
  "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/",
  "ftp/F-F_Research_Data_Factors_CSV.zip"
)

tmp <- tempfile(fileext = ".zip")
download.file(ff_url, tmp, mode = "wb", quiet = TRUE)

# Check actual filename inside the zip (avoids hard-coding a name that may change)
zip_contents <- unzip(tmp, list = TRUE)
csv_name     <- zip_contents$Name[1]
cat("File inside zip:", csv_name, "\n")
## File inside zip: F-F_Research_Data_Factors.csv
# Read the CSV (skip the 3 header rows)
ff_raw <- read.csv(unz(tmp, csv_name),
                   skip = 3, header = TRUE, stringsAsFactors = FALSE)

# Keep only monthly rows (date column is 6 digits: YYYYMM)
ff_monthly <- ff_raw[nchar(trimws(ff_raw[, 1])) == 6, ]

# Rename columns
colnames(ff_monthly)[1:5] <- c("Date", "Mkt_RF", "SMB", "HML", "RF")

# Convert to numeric and scale from percentage to decimal
ff_monthly$Date   <- as.character(trimws(ff_monthly$Date))
ff_monthly$Mkt_RF <- as.numeric(ff_monthly$Mkt_RF) / 100
ff_monthly$SMB    <- as.numeric(ff_monthly$SMB)    / 100
ff_monthly$HML    <- as.numeric(ff_monthly$HML)    / 100
ff_monthly$RF     <- as.numeric(ff_monthly$RF)     / 100

# Create end-of-month Date column
ff_monthly$Date <- as.Date(paste0(ff_monthly$Date, "01"), format = "%Y%m%d")
ff_monthly$Date <- as.Date(format(ff_monthly$Date + 31, "%Y-%m-01")) - 1

# Drop any rows where Date conversion failed
ff_monthly <- ff_monthly[!is.na(ff_monthly$Date), ]

# Convert to xts and filter to our window
ff_xts <- xts(ff_monthly[, c("Mkt_RF", "SMB", "HML", "RF")],
              order.by = ff_monthly$Date)
ff_xts <- ff_xts["2010/2025"]

head(ff_xts)
##             Mkt_RF     SMB     HML    RF
## 2010-01-31 -0.0335  0.0043  0.0033 0e+00
## 2010-02-28  0.0339  0.0118  0.0318 0e+00
## 2010-03-31  0.0630  0.0146  0.0219 1e-04
## 2010-04-30  0.0199  0.0484  0.0296 1e-04
## 2010-05-31 -0.0790  0.0013 -0.0248 1e-04
## 2010-06-30 -0.0556 -0.0179 -0.0473 1e-04

Q4 — Merge Monthly ETF Returns with FF3 Factors

# Align by date index (inner join keeps only overlapping dates)
merged_data <- merge(monthly_returns, ff_xts, join = "inner")
merged_data <- na.omit(merged_data)

cat("Rows in merged dataset:", nrow(merged_data), "\n")
## Rows in merged dataset: 191
cat("Date range:",
    as.character(index(merged_data)[1]),
    "to",
    as.character(tail(index(merged_data), 1)), "\n")
## Date range: 2010-02-28 to 2025-12-31
head(merged_data)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-02-28  0.03119537  0.04603911  0.017763489  0.04475136  0.002667386
## 2010-03-31  0.06087946  0.07710897  0.081108997  0.08230684  0.063854328
## 2010-04-30  0.01546998  0.02242518 -0.001661812  0.05678476 -0.028045554
## 2010-05-31 -0.07945456 -0.07392366 -0.093935673 -0.07536655 -0.111928092
## 2010-06-30 -0.05174120 -0.05975688 -0.013986635 -0.07743393 -0.020619541
## 2010-07-31  0.06830097  0.07258303  0.109324837  0.06730911  0.116104123
##                     TLT         IYR          GLD  Mkt_RF     SMB     HML    RF
## 2010-02-28 -0.003424170  0.05457073  0.032748219  0.0339  0.0118  0.0318 0e+00
## 2010-03-31 -0.020573618  0.09748538 -0.004386396  0.0630  0.0146  0.0219 1e-04
## 2010-04-30  0.033218702  0.06388075  0.058834363  0.0199  0.0484  0.0296 1e-04
## 2010-05-31  0.051083627 -0.05683529  0.030513147 -0.0790  0.0013 -0.0248 1e-04
## 2010-06-30  0.057977479 -0.04670110  0.023553189 -0.0556 -0.0179 -0.0473 1e-04
## 2010-07-31 -0.009462935  0.09404778 -0.050871157  0.0692  0.0022 -0.0050 1e-04

Q5 — MVP via CAPM Covariance (60-month window: 2020/03–2025/02)

5a — Estimate Covariance from CAPM Residuals

The CAPM model is: \[R_{i,t} - R_{f,t} = \alpha_i + \beta_i (R_{m,t} - R_{f,t}) + \varepsilon_{i,t}\]

The CAPM-implied covariance matrix is: \[\Sigma = \beta \beta^\top \sigma^2_{m} + D\] where \(D\) is a diagonal matrix of residual variances.

window_start <- "2020-03-01"
window_end   <- "2025-02-28"

etf_window <- merged_data[paste0(window_start, "/", window_end), tickers]
ff_window  <- merged_data[paste0(window_start, "/", window_end),
                          c("Mkt_RF", "SMB", "HML", "RF")]

cat("Window rows:", nrow(etf_window), "\n")   # should be ~60
## Window rows: 60
# Excess returns
excess_etf <- etf_window - as.numeric(ff_window[, "RF"])
mkt_rf     <- ff_window[, "Mkt_RF"]

# Estimate CAPM betas
betas_capm <- sapply(tickers, function(tk) {
  fit <- lm(as.numeric(excess_etf[, tk]) ~ as.numeric(mkt_rf))
  coef(fit)[2]
})

# Residual variances
resid_var_capm <- sapply(tickers, function(tk) {
  fit <- lm(as.numeric(excess_etf[, tk]) ~ as.numeric(mkt_rf))
  var(resid(fit))
})

# Market variance
mkt_var <- var(as.numeric(mkt_rf))

# CAPM covariance matrix
sigma_capm <- betas_capm %o% betas_capm * mkt_var + diag(resid_var_capm)
rownames(sigma_capm) <- colnames(sigma_capm) <- tickers

cat("CAPM Covariance Matrix (annualized %):\n")
## CAPM Covariance Matrix (annualized %):
print(round(sigma_capm * 12 * 100, 4))
##        SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD
## SPY 3.1480 3.4605 2.2658 3.8589 2.6826 1.0773 3.2659 0.5681
## QQQ 3.4605 4.5496 2.5223 4.2957 2.9863 1.1992 3.6356 0.6324
## EEM 2.2658 2.5223 3.2545 2.8127 1.9553 0.7852 2.3805 0.4141
## IWM 3.8589 4.2957 2.8127 5.9793 3.3301 1.3373 4.0542 0.7052
## EFA 2.6826 2.9863 1.9553 3.3301 3.1422 0.9297 2.8183 0.4903
## TLT 1.0773 1.1992 0.7852 1.3373 0.9297 2.3214 1.1318 0.1969
## IYR 3.2659 3.6356 2.3805 4.0542 2.8183 1.1318 4.6658 0.5969
## GLD 0.5681 0.6324 0.4141 0.7052 0.4903 0.1969 0.5969 2.0799

5b — Compute MVP Weights (CAPM)

The Minimum Variance Portfolio solves: \[\min_w \; w^\top \Sigma w \quad \text{s.t.} \quad \mathbf{1}^\top w = 1, \quad w \ge 0\]

mvp_weights <- function(sigma_mat, allow_short = FALSE) {
  n    <- nrow(sigma_mat)
  ones <- rep(1, n)

  if (!allow_short) {
    # Quadratic programming with non-negativity constraints
    Dmat <- 2 * sigma_mat
    dvec <- rep(0, n)
    Amat <- cbind(ones, diag(n))
    bvec <- c(1, rep(0, n))
    sol  <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
    w    <- sol$solution
  } else {
    # Analytical solution (short sales allowed)
    w <- solve(sigma_mat, ones) /
         as.numeric(t(ones) %*% solve(sigma_mat, ones))
  }
  names(w) <- rownames(sigma_mat)
  w
}

# CAPM-based MVP weights (long-only)
w_capm <- mvp_weights(sigma_capm)

cat("MVP Weights from CAPM Covariance:\n")
## MVP Weights from CAPM Covariance:
print(round(w_capm, 4))
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.0000 0.0000 0.1401 0.0000 0.0838 0.3425 0.0000 0.4336
barplot(w_capm, main = "MVP Weights — CAPM", col = "steelblue",
        ylab = "Weight", las = 2)

5c — MVP Monthly Return: March 2025 (CAPM)

ret_mar2025 <- merged_data["2025-03", tickers]

if (nrow(ret_mar2025) == 0) {
  cat("March 2025 not in dataset — check download end date.\n")
} else {
  r_mar2025        <- as.numeric(ret_mar2025)
  mvp_ret_capm_mar <- sum(w_capm * r_mar2025)
  cat("Realized MVP Return (CAPM) — March 2025:",
      round(mvp_ret_capm_mar * 100, 4), "%\n")
}
## Realized MVP Return (CAPM) — March 2025: 3.8576 %

Q6 — MVP via FF3-Factor Covariance (2020/03–2025/02)

The FF3 model is: \[R_{i,t} - R_{f,t} = \alpha_i + \beta_{i,m}(R_m - R_f) + \beta_{i,s}\text{SMB}_t + \beta_{i,h}\text{HML}_t + \varepsilon_{i,t}\]

The FF3-implied covariance matrix is: \[\Sigma = B \Sigma_F B^\top + D\] where \(B\) is the \(n \times 3\) factor-loading matrix, \(\Sigma_F\) is the \(3 \times 3\) factor covariance, and \(D\) is diagonal residual variance.

factors <- as.data.frame(ff_window[, c("Mkt_RF", "SMB", "HML")])

# Factor betas for each ETF
B_ff3 <- t(sapply(tickers, function(tk) {
  fit <- lm(as.numeric(excess_etf[, tk]) ~ ., data = factors)
  coef(fit)[-1]   # drop intercept
}))
colnames(B_ff3) <- c("Mkt_RF", "SMB", "HML")

# Factor covariance matrix (3×3)
Sigma_F <- cov(factors)

# Residual variances from FF3
resid_var_ff3 <- sapply(tickers, function(tk) {
  fit <- lm(as.numeric(excess_etf[, tk]) ~ ., data = factors)
  var(resid(fit))
})

# FF3 covariance matrix
sigma_ff3 <- B_ff3 %*% Sigma_F %*% t(B_ff3) + diag(resid_var_ff3)
rownames(sigma_ff3) <- colnames(sigma_ff3) <- tickers

# FF3-based MVP weights (long-only)
w_ff3 <- mvp_weights(sigma_ff3)

cat("MVP Weights from FF3 Covariance:\n")
## MVP Weights from FF3 Covariance:
print(round(w_ff3, 4))
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.0000 0.0000 0.1565 0.0000 0.0821 0.3391 0.0000 0.4223
barplot(w_ff3, main = "MVP Weights — FF3", col = "coral",
        ylab = "Weight", las = 2)


Q7 — Realized MVP Returns in March 2025

if (nrow(ret_mar2025) > 0) {
  r_mar2025 <- as.numeric(ret_mar2025)

  mvp_ret_capm_mar <- sum(w_capm * r_mar2025)
  mvp_ret_ff3_mar  <- sum(w_ff3  * r_mar2025)

  results_mar <- data.frame(
    Model      = c("CAPM", "FF3"),
    Return_pct = round(c(mvp_ret_capm_mar, mvp_ret_ff3_mar) * 100, 4)
  )
  cat("=== Realized MVP Returns — March 2025 ===\n")
  print(results_mar)
} else {
  cat("March 2025 returns not available.\n")
}
## === Realized MVP Returns — March 2025 ===
##   Model Return_pct
## 1  CAPM     3.8576
## 2   FF3     3.7730

Q8 — Realized MVP Return in April 2025 (Rolling 60-Month Window)

For April 2025, the covariance is re-estimated using the window 2020/04–2025/03.

window2_start <- "2020-04-01"
window2_end   <- "2025-03-31"

etf_w2 <- merged_data[paste0(window2_start, "/", window2_end), tickers]
ff_w2  <- merged_data[paste0(window2_start, "/", window2_end),
                      c("Mkt_RF", "SMB", "HML", "RF")]

if (nrow(etf_w2) < 60) {
  cat("Rolling window for April 2025 has only", nrow(etf_w2),
      "months — March 2025 data may be needed.\n")
} else {
  excess_etf2 <- etf_w2 - as.numeric(ff_w2[, "RF"])
  mkt_rf2     <- ff_w2[, "Mkt_RF"]
  factors2    <- as.data.frame(ff_w2[, c("Mkt_RF", "SMB", "HML")])

  # ---- CAPM ----
  betas2_capm <- sapply(tickers, function(tk) {
    coef(lm(as.numeric(excess_etf2[, tk]) ~ as.numeric(mkt_rf2)))[2]
  })
  resid2_capm <- sapply(tickers, function(tk) {
    var(resid(lm(as.numeric(excess_etf2[, tk]) ~ as.numeric(mkt_rf2))))
  })
  sigma2_capm <- betas2_capm %o% betas2_capm * var(as.numeric(mkt_rf2)) +
                 diag(resid2_capm)
  rownames(sigma2_capm) <- colnames(sigma2_capm) <- tickers
  w2_capm <- mvp_weights(sigma2_capm)

  # ---- FF3 ----
  B2_ff3 <- t(sapply(tickers, function(tk) {
    coef(lm(as.numeric(excess_etf2[, tk]) ~ ., data = factors2))[-1]
  }))
  Sigma2_F    <- cov(factors2)
  resid2_ff3  <- sapply(tickers, function(tk) {
    var(resid(lm(as.numeric(excess_etf2[, tk]) ~ ., data = factors2)))
  })
  sigma2_ff3  <- B2_ff3 %*% Sigma2_F %*% t(B2_ff3) + diag(resid2_ff3)
  rownames(sigma2_ff3) <- colnames(sigma2_ff3) <- tickers
  w2_ff3 <- mvp_weights(sigma2_ff3)

  # April 2025 actual returns
  ret_apr2025 <- merged_data["2025-04", tickers]

  if (nrow(ret_apr2025) == 0) {
    cat("April 2025 not yet in dataset.\n")
    cat("New CAPM weights (for April 2025):\n"); print(round(w2_capm, 4))
    cat("New FF3  weights (for April 2025):\n"); print(round(w2_ff3,  4))
  } else {
    r_apr2025     <- as.numeric(ret_apr2025)
    mvp_ret2_capm <- sum(w2_capm * r_apr2025)
    mvp_ret2_ff3  <- sum(w2_ff3  * r_apr2025)

    results_apr <- data.frame(
      Model      = c("CAPM", "FF3"),
      Return_pct = round(c(mvp_ret2_capm, mvp_ret2_ff3) * 100, 4)
    )
    cat("=== Realized MVP Returns — April 2025 ===\n")
    print(results_apr)
  }
}
## === Realized MVP Returns — April 2025 ===
##   Model Return_pct
## 1  CAPM     2.1839
## 2   FF3     2.1333

Summary Table

weight_df <- data.frame(
  Ticker      = tickers,
  CAPM_Weight = round(w_capm, 4),
  FF3_Weight  = round(w_ff3,  4)
)
knitr::kable(weight_df,
             caption = "MVP Optimal Weights: CAPM vs FF3 (2020/03–2025/02)")
MVP Optimal Weights: CAPM vs FF3 (2020/03–2025/02)
Ticker CAPM_Weight FF3_Weight
SPY SPY 0.0000 0.0000
QQQ QQQ 0.0000 0.0000
EEM EEM 0.1401 0.1565
IWM IWM 0.0000 0.0000
EFA EFA 0.0838 0.0821
TLT TLT 0.3425 0.3391
IYR IYR 0.0000 0.0000
GLD GLD 0.4336 0.4223

Textbook questions (Part II) are answered separately in this document.