library(quantmod)
library(xts)
library(PerformanceAnalytics)
library(quadprog)
library(knitr)
library(ggplot2)Download ETF daily data from Yahoo Finance for SPY, QQQ, EEM,
IWM, EFA, TLT, IYR, GLD from 2010 to 2025. We use the
quantmod package and adjusted prices.
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
# Download data from Yahoo Finance
getSymbols(tickers, src = "yahoo", from = "2010-01-01", to = "2025-12-31",
auto.assign = TRUE)## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"
# Extract adjusted prices into a single xts object
prices <- do.call(merge, lapply(tickers, function(t) Ad(get(t))))
colnames(prices) <- tickers
head(prices)## SPY QQQ EEM IWM EFA TLT IYR
## 2010-01-04 84.79638 40.29079 30.35151 51.36657 35.12844 56.13516 26.76811
## 2010-01-05 85.02083 40.29079 30.57181 51.18993 35.15940 56.49767 26.83238
## 2010-01-06 85.08070 40.04774 30.63576 51.14177 35.30801 55.74139 26.82069
## 2010-01-07 85.43985 40.07380 30.45810 51.51910 35.17178 55.83517 27.06027
## 2010-01-08 85.72420 40.40363 30.69973 51.80010 35.45043 55.81016 26.87913
## 2010-01-11 85.84389 40.23872 30.63576 51.59136 35.74147 55.50388 27.00768
## GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 2025-12-22 682.9648 618.4302 54.01 253.1297 95.70 86.39350 93.34799 408.23
## 2025-12-23 686.0863 621.3265 54.31 251.6324 96.29 86.53195 93.27818 413.64
## 2025-12-24 688.4997 623.1443 54.42 252.2613 96.41 87.05608 93.95628 411.93
## 2025-12-26 688.4299 623.1043 54.80 250.9736 96.57 86.76929 94.05600 416.74
## 2025-12-29 685.9766 620.0881 54.66 249.4363 96.28 87.09564 94.23550 398.60
## 2025-12-30 685.1389 618.6499 54.88 247.5896 96.44 86.88797 94.44491 398.89
## Dimensions: 4023 8
cat("Date range:", as.character(index(prices)[1]), "to",
as.character(index(prices)[nrow(prices)]), "\n")## Date range: 2010-01-04 to 2025-12-30
# Normalize prices to 100 for comparison
prices_norm <- prices / rep(as.numeric(prices[1, ]), each = nrow(prices)) * 100
plot.zoo(prices_norm, plot.type = "single", col = 1:8, lwd = 1.5,
main = "Normalized ETF Prices (Base = 100)",
xlab = "Date", ylab = "Normalized Price")
legend("topleft", legend = tickers, col = 1:8, lty = 1, lwd = 1.5,
cex = 0.8, ncol = 2)We convert daily adjusted prices to monthly and compute discrete returns: \[R_t = \frac{P_t - P_{t-1}}{P_{t-1}}\]
# Convert to monthly prices (end of month)
monthly_prices <- to.monthly(prices, indexAt = "lastof", OHLC = FALSE)
# Calculate discrete monthly returns
monthly_returns <- Return.calculate(monthly_prices, method = "discrete")
monthly_returns <- monthly_returns[-1, ] # Remove first NA row
head(monthly_returns)## SPY QQQ EEM IWM EFA
## 2010-02-28 0.03119479 0.04603816 0.017764202 0.04475112 0.002667738
## 2010-03-31 0.06087919 0.07710950 0.081108997 0.08230700 0.063853962
## 2010-04-30 0.01547051 0.02242527 -0.001661812 0.05678448 -0.028045888
## 2010-05-31 -0.07945464 -0.07392366 -0.093935800 -0.07536624 -0.111927859
## 2010-06-30 -0.05174110 -0.05975668 -0.013986637 -0.07743407 -0.020619162
## 2010-07-31 0.06830076 0.07258260 0.109324852 0.06730911 0.116103906
## TLT IYR GLD
## 2010-02-28 -0.00342417 0.05457035 0.032748219
## 2010-03-31 -0.02057355 0.09748526 -0.004386396
## 2010-04-30 0.03321862 0.06388096 0.058834363
## 2010-05-31 0.05108309 -0.05683523 0.030513147
## 2010-06-30 0.05797831 -0.04670136 0.023553189
## 2010-07-31 -0.00946352 0.09404828 -0.050871157
## SPY QQQ EEM IWM EFA
## 2025-07-31 0.023031543 0.024236854 0.006633454 0.0166829141 -0.020919628
## 2025-08-31 0.020519538 0.009539647 0.026770994 0.0719267543 0.045246876
## 2025-09-30 0.035620413 0.053762268 0.070998795 0.0317911543 0.020660248
## 2025-10-31 0.023837414 0.047803817 0.035580494 0.0176475324 0.011995297
## 2025-11-30 0.001949944 -0.015610297 -0.017721479 0.0102343663 0.007408238
## 2025-12-31 0.008267742 0.001579351 0.024767431 0.0004491716 0.031490000
## TLT IYR GLD
## 2025-07-31 -0.0113966251 0.0011608154 -0.006134551
## 2025-08-31 0.0001269506 0.0290893452 0.049874625
## 2025-09-30 0.0359098527 0.0005545544 0.117584158
## 2025-10-31 0.0138108172 -0.0249278342 0.035586671
## 2025-11-30 0.0027233086 0.0236636260 0.053678176
## 2025-12-31 -0.0187683309 -0.0136584167 0.028385092
## Monthly returns dimensions: 191 8
cat("Date range:", as.character(index(monthly_returns)[1]), "to",
as.character(index(monthly_returns)[nrow(monthly_returns)]), "\n")## Date range: 2010-02-28 to 2025-12-31
# Summary statistics
stats <- data.frame(
Mean = round(colMeans(monthly_returns) * 100, 3),
SD = round(apply(monthly_returns, 2, sd) * 100, 3),
Min = round(apply(monthly_returns, 2, min) * 100, 3),
Max = round(apply(monthly_returns, 2, max) * 100, 3)
)
kable(stats, caption = "Monthly Return Statistics (%)")| Mean | SD | Min | Max | |
|---|---|---|---|---|
| SPY | 1.214 | 4.134 | -12.487 | 12.698 |
| QQQ | 1.606 | 4.967 | -13.596 | 14.974 |
| EEM | 0.500 | 5.152 | -17.895 | 16.268 |
| IWM | 1.019 | 5.644 | -21.477 | 18.244 |
| EFA | 0.671 | 4.477 | -14.107 | 14.269 |
| TLT | 0.291 | 3.941 | -9.424 | 13.206 |
| IYR | 0.807 | 4.817 | -19.632 | 13.190 |
| GLD | 0.799 | 4.552 | -11.062 | 12.275 |
We download the Fama-French 3-factor data (Mkt-RF, SMB, HML) from Ken French’s website and convert from percentage to decimal.
# Download FF3 factors from Ken French's website
ff3_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
temp <- tempfile()
download.file(ff3_url, temp, mode = "wb")
unzip(temp, exdir = tempdir())
# Read the CSV file
ff3_file <- list.files(tempdir(), pattern = "F-F_Research_Data_Factors", full.names = TRUE)
ff3_file <- ff3_file[grepl("\\.CSV$", ff3_file, ignore.case = TRUE)]
# Read the file, skipping header rows
ff3_raw <- read.csv(ff3_file[1], skip = 3, header = FALSE, stringsAsFactors = FALSE)
colnames(ff3_raw) <- c("Date", "Mkt_RF", "SMB", "HML", "RF")
# Keep only monthly data (6-digit dates: YYYYMM)
ff3_raw <- ff3_raw[nchar(trimws(ff3_raw$Date)) == 6, ]
ff3_raw <- ff3_raw[!is.na(as.numeric(ff3_raw$Mkt_RF)), ]
# Convert to numeric
ff3_raw$Mkt_RF <- as.numeric(ff3_raw$Mkt_RF)
ff3_raw$SMB <- as.numeric(ff3_raw$SMB)
ff3_raw$HML <- as.numeric(ff3_raw$HML)
ff3_raw$RF <- as.numeric(ff3_raw$RF)
# Convert from percentage to decimal
ff3_raw$Mkt_RF <- ff3_raw$Mkt_RF / 100
ff3_raw$SMB <- ff3_raw$SMB / 100
ff3_raw$HML <- ff3_raw$HML / 100
ff3_raw$RF <- ff3_raw$RF / 100
# Create date column (end of month)
ff3_raw$DateParsed <- as.Date(paste0(ff3_raw$Date, "01"), format = "%Y%m%d")
ff3_raw$DateParsed <- as.Date(as.yearmon(ff3_raw$DateParsed), frac = 1)
# Convert to xts
ff3 <- xts(ff3_raw[, c("Mkt_RF", "SMB", "HML", "RF")],
order.by = ff3_raw$DateParsed)
# Filter to our period
ff3 <- ff3["2010/2025"]
head(ff3)## Mkt_RF SMB HML RF
## 2010-01-31 -0.0335 0.0043 0.0033 0e+00
## 2010-02-28 0.0339 0.0118 0.0318 0e+00
## 2010-03-31 0.0630 0.0146 0.0219 1e-04
## 2010-04-30 0.0199 0.0484 0.0296 1e-04
## 2010-05-31 -0.0790 0.0013 -0.0248 1e-04
## 2010-06-30 -0.0556 -0.0179 -0.0473 1e-04
## Mkt_RF SMB HML RF
## 2025-07-31 0.0198 0.0027 -0.0127 0.0034
## 2025-08-31 0.0184 0.0387 0.0442 0.0038
## 2025-09-30 0.0339 -0.0185 -0.0105 0.0033
## 2025-10-31 0.0196 -0.0055 -0.0310 0.0037
## 2025-11-30 -0.0013 0.0038 0.0376 0.0030
## 2025-12-31 -0.0036 -0.0106 0.0242 0.0034
## FF3 dimensions: 192 4
library(zoo)
# Convert both to zoo with yearmon index for proper merging
ret_zoo <- zoo(coredata(monthly_returns), order.by = as.yearmon(index(monthly_returns)))
ff3_zoo <- zoo(coredata(ff3), order.by = as.yearmon(index(ff3)))
# Merge by yearmon
merged_zoo <- merge(ret_zoo, ff3_zoo, all = FALSE)
# Convert back to xts
merged_data <- as.xts(merged_zoo)
colnames(merged_data) <- c(tickers, "Mkt_RF", "SMB", "HML", "RF")
head(merged_data)## SPY QQQ EEM IWM EFA
## Feb 2010 0.03119479 0.04603816 0.017764202 0.04475112 0.002667738
## Mar 2010 0.06087919 0.07710950 0.081108997 0.08230700 0.063853962
## Apr 2010 0.01547051 0.02242527 -0.001661812 0.05678448 -0.028045888
## May 2010 -0.07945464 -0.07392366 -0.093935800 -0.07536624 -0.111927859
## Jun 2010 -0.05174110 -0.05975668 -0.013986637 -0.07743407 -0.020619162
## Jul 2010 0.06830076 0.07258260 0.109324852 0.06730911 0.116103906
## TLT IYR GLD Mkt_RF SMB HML RF
## Feb 2010 -0.00342417 0.05457035 0.032748219 0.0339 0.0118 0.0318 0e+00
## Mar 2010 -0.02057355 0.09748526 -0.004386396 0.0630 0.0146 0.0219 1e-04
## Apr 2010 0.03321862 0.06388096 0.058834363 0.0199 0.0484 0.0296 1e-04
## May 2010 0.05108309 -0.05683523 0.030513147 -0.0790 0.0013 -0.0248 1e-04
## Jun 2010 0.05797831 -0.04670136 0.023553189 -0.0556 -0.0179 -0.0473 1e-04
## Jul 2010 -0.00946352 0.09404828 -0.050871157 0.0692 0.0022 -0.0050 1e-04
## SPY QQQ EEM IWM EFA
## Jul 2025 0.023031543 0.024236854 0.006633454 0.0166829141 -0.020919628
## Aug 2025 0.020519538 0.009539647 0.026770994 0.0719267543 0.045246876
## Sep 2025 0.035620413 0.053762268 0.070998795 0.0317911543 0.020660248
## Oct 2025 0.023837414 0.047803817 0.035580494 0.0176475324 0.011995297
## Nov 2025 0.001949944 -0.015610297 -0.017721479 0.0102343663 0.007408238
## Dec 2025 0.008267742 0.001579351 0.024767431 0.0004491716 0.031490000
## TLT IYR GLD Mkt_RF SMB HML
## Jul 2025 -0.0113966251 0.0011608154 -0.006134551 0.0198 0.0027 -0.0127
## Aug 2025 0.0001269506 0.0290893452 0.049874625 0.0184 0.0387 0.0442
## Sep 2025 0.0359098527 0.0005545544 0.117584158 0.0339 -0.0185 -0.0105
## Oct 2025 0.0138108172 -0.0249278342 0.035586671 0.0196 -0.0055 -0.0310
## Nov 2025 0.0027233086 0.0236636260 0.053678176 -0.0013 0.0038 0.0376
## Dec 2025 -0.0187683309 -0.0136584167 0.028385092 -0.0036 -0.0106 0.0242
## RF
## Jul 2025 0.0034
## Aug 2025 0.0038
## Sep 2025 0.0033
## Oct 2025 0.0037
## Nov 2025 0.0030
## Dec 2025 0.0034
## Merged data dimensions: 191 12
cat("Date range:", as.character(index(merged_data)[1]), "to",
as.character(index(merged_data)[nrow(merged_data)]), "\n")## Date range: Feb 2010 to Dec 2025
Using the CAPM model, we estimate the covariance matrix for the 8-asset portfolio from past 60-month returns (2020/03 - 2025/02), then compute the Minimum Variance Portfolio (MVP).
CAPM Model: \(R_i - R_f = \alpha_i + \beta_i (R_m - R_f) + \epsilon_i\)
The CAPM-based covariance matrix: \(\Sigma = \beta \beta' \sigma_m^2 + D\) where \(D\) is the diagonal matrix of residual variances.
# Define the 60-month estimation window
est_start <- "2020-03"
est_end <- "2025-02"
# Subset merged data for estimation period
est_data <- merged_data[paste0(est_start, "/", est_end)]
cat("Estimation window observations:", nrow(est_data), "\n")## Estimation window observations: 60
# Extract ETF excess returns and market factor
rf <- est_data[, "RF"]
mkt_rf <- est_data[, "Mkt_RF"]
# Calculate excess returns for each ETF
excess_returns <- est_data[, tickers] - as.numeric(rf)
# CAPM regression for each ETF
capm_betas <- numeric(length(tickers))
capm_alphas <- numeric(length(tickers))
capm_residual_var <- numeric(length(tickers))
names(capm_betas) <- names(capm_alphas) <- names(capm_residual_var) <- tickers
for (i in seq_along(tickers)) {
fit <- lm(as.numeric(excess_returns[, i]) ~ as.numeric(mkt_rf))
capm_alphas[i] <- coef(fit)[1]
capm_betas[i] <- coef(fit)[2]
capm_residual_var[i] <- var(residuals(fit))
}
capm_results <- data.frame(
Alpha = round(capm_alphas * 100, 4),
Beta = round(capm_betas, 4),
Residual_Var = round(capm_residual_var * 10000, 4)
)
kable(capm_results, caption = "CAPM Regression Results")| Alpha | Beta | Residual_Var | |
|---|---|---|---|
| SPY | 0.0623 | 0.9552 | 0.3287 |
| QQQ | 0.2636 | 1.0634 | 5.8119 |
| EEM | -0.6243 | 0.6963 | 13.3578 |
| IWM | -0.6455 | 1.1858 | 9.9075 |
| EFA | -0.3805 | 0.8243 | 6.8931 |
| TLT | -1.1563 | 0.3310 | 16.2335 |
| IYR | -0.8011 | 1.0036 | 10.2892 |
| GLD | 0.6293 | 0.1746 | 16.4671 |
# CAPM-based covariance matrix
sigma2_mkt <- as.numeric(var(mkt_rf))
beta_vec <- matrix(capm_betas, ncol = 1)
D_capm <- diag(capm_residual_var)
Sigma_capm <- beta_vec %*% t(beta_vec) * sigma2_mkt + D_capm
colnames(Sigma_capm) <- rownames(Sigma_capm) <- tickers
cat("\nCAPM-based Covariance Matrix:\n")##
## CAPM-based Covariance Matrix:
## SPY QQQ EEM IWM EFA TLT IYR GLD
## SPY 26.2336 28.8373 18.8819 32.1577 22.3551 8.9775 27.2157 4.7343
## QQQ 28.8373 37.9136 21.0193 35.7979 24.8856 9.9937 30.2964 5.2702
## EEM 18.8819 21.0193 27.1207 23.4396 16.2945 6.5437 19.8373 3.4508
## IWM 32.1577 35.7979 23.4396 49.8272 27.7510 11.1444 33.7848 5.8770
## EFA 22.3551 24.8856 16.2945 27.7510 26.1848 7.7473 23.4862 4.0855
## TLT 8.9775 9.9937 6.5437 11.1444 7.7473 19.3447 9.4318 1.6407
## IYR 27.2157 30.2964 19.8373 33.7848 23.4862 9.4318 38.8819 4.9738
## GLD 4.7343 5.2702 3.4508 5.8770 4.0855 1.6407 4.9738 17.3323
# Compute MVP weights using quadratic programming
# MVP: minimize w'Σw subject to sum(w) = 1
# solve.QP: min(-d'b + 1/2 b'Db) subject to A'b >= b0
n_assets <- length(tickers)
Dmat <- 2 * Sigma_capm
dvec <- rep(0, n_assets)
Amat <- cbind(rep(1, n_assets), diag(n_assets))
bvec <- c(1, rep(0, n_assets)) # sum = 1, no short selling
meq <- 1 # first constraint is equality
mvp_capm <- solve.QP(Dmat, dvec, Amat, bvec, meq)
w_capm <- mvp_capm$solution
names(w_capm) <- tickers
cat("CAPM-based MVP Weights:\n")## CAPM-based MVP Weights:
mvp_weights_capm <- data.frame(
ETF = tickers,
Weight = round(w_capm, 6),
Weight_Pct = paste0(round(w_capm * 100, 2), "%")
)
kable(mvp_weights_capm, caption = "CAPM-based MVP Optimal Weights")| ETF | Weight | Weight_Pct | |
|---|---|---|---|
| SPY | SPY | 0.000000 | 0% |
| QQQ | QQQ | 0.000000 | 0% |
| EEM | EEM | 0.140091 | 14.01% |
| IWM | IWM | 0.000000 | 0% |
| EFA | EFA | 0.083844 | 8.38% |
| TLT | TLT | 0.342482 | 34.25% |
| IYR | IYR | 0.000000 | 0% |
| GLD | GLD | 0.433583 | 43.36% |
# MVP expected return and risk
mvp_ret_capm <- sum(w_capm * colMeans(est_data[, tickers]))
mvp_sd_capm <- sqrt(t(w_capm) %*% Sigma_capm %*% w_capm)
cat("CAPM MVP Monthly Expected Return:", round(mvp_ret_capm * 100, 4), "%\n")## CAPM MVP Monthly Expected Return: 0.3927 %
## CAPM MVP Monthly Std Dev: 2.9838 %
FF3 Model: \(R_i - R_f = \alpha_i + \beta_{i1}(Mkt-RF) + \beta_{i2}(SMB) + \beta_{i3}(HML) + \epsilon_i\)
The factor model covariance matrix: \(\Sigma = B \Sigma_f B' + D\)
# FF3 regression for each ETF
ff3_betas <- matrix(0, nrow = length(tickers), ncol = 3)
ff3_alphas <- numeric(length(tickers))
ff3_residual_var <- numeric(length(tickers))
rownames(ff3_betas) <- tickers
colnames(ff3_betas) <- c("Mkt_RF", "SMB", "HML")
smb <- est_data[, "SMB"]
hml <- est_data[, "HML"]
for (i in seq_along(tickers)) {
fit <- lm(as.numeric(excess_returns[, i]) ~
as.numeric(mkt_rf) + as.numeric(smb) + as.numeric(hml))
ff3_alphas[i] <- coef(fit)[1]
ff3_betas[i, ] <- coef(fit)[2:4]
ff3_residual_var[i] <- var(residuals(fit))
}
ff3_results <- data.frame(
Alpha = round(ff3_alphas * 100, 4),
Beta_Mkt = round(ff3_betas[, 1], 4),
Beta_SMB = round(ff3_betas[, 2], 4),
Beta_HML = round(ff3_betas[, 3], 4),
Residual_Var = round(ff3_residual_var * 10000, 4)
)
kable(ff3_results, caption = "FF 3-Factor Regression Results")| Alpha | Beta_Mkt | Beta_SMB | Beta_HML | Residual_Var | |
|---|---|---|---|---|---|
| SPY | -0.0106 | 0.9853 | -0.1487 | 0.0194 | 0.1213 |
| QQQ | 0.3220 | 1.0813 | -0.0890 | -0.3994 | 2.2366 |
| EEM | -0.6228 | 0.6794 | 0.0834 | 0.1476 | 12.8013 |
| IWM | -0.3042 | 1.0058 | 0.8895 | 0.2660 | 0.6215 |
| EFA | -0.4871 | 0.8477 | -0.1152 | 0.2169 | 5.8029 |
| TLT | -1.1213 | 0.3443 | -0.0658 | -0.2622 | 14.6810 |
| IYR | -0.8329 | 0.9953 | 0.0409 | 0.2032 | 9.3686 |
| GLD | 0.4817 | 0.2420 | -0.3330 | -0.0197 | 15.4200 |
# Factor covariance matrix
factors <- cbind(as.numeric(mkt_rf), as.numeric(smb), as.numeric(hml))
Sigma_f <- cov(factors)
# FF3-based covariance matrix: B * Sigma_f * B' + D
B <- ff3_betas
D_ff3 <- diag(ff3_residual_var)
Sigma_ff3 <- B %*% Sigma_f %*% t(B) + D_ff3
colnames(Sigma_ff3) <- rownames(Sigma_ff3) <- tickers
cat("\nFF3-based Covariance Matrix:\n")##
## FF3-based Covariance Matrix:
## SPY QQQ EEM IWM EFA TLT IYR GLD
## SPY 26.2336 28.8464 18.8100 31.0236 22.5723 8.9935 27.2170 5.1812
## QQQ 28.8464 37.9136 19.6376 32.4269 23.1390 12.3494 28.4823 5.8384
## EEM 18.8100 19.6376 27.1207 25.1136 16.8963 5.6300 20.5364 3.0842
## IWM 31.0236 32.4269 25.1136 49.8272 28.1948 8.8694 35.4628 2.9239
## EFA 22.5723 23.1390 16.8963 28.1948 26.1848 6.6072 24.3788 4.2840
## TLT 8.9935 12.3494 5.6300 8.8694 6.6072 19.3447 8.2368 2.0365
## IYR 27.2170 28.4823 20.5364 35.4628 24.3788 8.2368 38.8819 4.6983
## GLD 5.1812 5.8384 3.0842 2.9239 4.2840 2.0365 4.6983 17.3323
# Compute MVP weights using FF3 covariance matrix
Dmat_ff3 <- 2 * Sigma_ff3
mvp_ff3 <- solve.QP(Dmat_ff3, dvec, Amat, bvec, meq)
w_ff3 <- mvp_ff3$solution
names(w_ff3) <- tickers
cat("FF3-based MVP Weights:\n")## FF3-based MVP Weights:
mvp_weights_ff3 <- data.frame(
ETF = tickers,
Weight = round(w_ff3, 6),
Weight_Pct = paste0(round(w_ff3 * 100, 2), "%")
)
kable(mvp_weights_ff3, caption = "FF3-based MVP Optimal Weights")| ETF | Weight | Weight_Pct | |
|---|---|---|---|
| SPY | SPY | 0.000000 | 0% |
| QQQ | QQQ | 0.000000 | 0% |
| EEM | EEM | 0.156531 | 15.65% |
| IWM | IWM | 0.000000 | 0% |
| EFA | EFA | 0.082084 | 8.21% |
| TLT | TLT | 0.339124 | 33.91% |
| IYR | IYR | 0.000000 | 0% |
| GLD | GLD | 0.422261 | 42.23% |
# MVP expected return and risk
mvp_ret_ff3 <- sum(w_ff3 * colMeans(est_data[, tickers]))
mvp_sd_ff3 <- sqrt(t(w_ff3) %*% Sigma_ff3 %*% w_ff3)
cat("FF3 MVP Monthly Expected Return:", round(mvp_ret_ff3 * 100, 4), "%\n")## FF3 MVP Monthly Expected Return: 0.3883 %
## FF3 MVP Monthly Std Dev: 2.9738 %
We invest in the 8-asset portfolio in 2025/03 using optimal MVP weights from Q5 (CAPM) and Q6 (FF3).
# Get March 2025 actual returns
march_2025 <- merged_data["2025-03", tickers]
if (nrow(march_2025) > 0) {
march_ret <- as.numeric(march_2025[1, ])
names(march_ret) <- tickers
# Realized portfolio return using CAPM weights
realized_capm_march <- sum(w_capm * march_ret)
# Realized portfolio return using FF3 weights
realized_ff3_march <- sum(w_ff3 * march_ret)
cat("Individual ETF Returns in March 2025:\n")
march_df <- data.frame(
ETF = tickers,
Return_Pct = round(march_ret * 100, 4),
CAPM_Weight = round(w_capm, 4),
FF3_Weight = round(w_ff3, 4),
CAPM_Contrib = round(w_capm * march_ret * 100, 4),
FF3_Contrib = round(w_ff3 * march_ret * 100, 4)
)
kable(march_df, caption = "March 2025 Realized Returns and Contributions")
cat("\n\nRealized MVP Portfolio Return (March 2025):\n")
cat(" CAPM-based MVP:", round(realized_capm_march * 100, 4), "%\n")
cat(" FF3-based MVP:", round(realized_ff3_march * 100, 4), "%\n")
} else {
cat("March 2025 data not yet available in the dataset.\n")
cat("Please ensure data download includes March 2025.\n")
}## Individual ETF Returns in March 2025:
##
##
## Realized MVP Portfolio Return (March 2025):
## CAPM-based MVP: 3.8576 %
## FF3-based MVP: 3.773 %
We shift the 60-month window by one month (2020/04 - 2025/03), re-estimate the MVP, and compute April 2025 realized return.
# New 60-month estimation window
est_start2 <- "2020-04"
est_end2 <- "2025-03"
est_data2 <- merged_data[paste0(est_start2, "/", est_end2)]
cat("New estimation window observations:", nrow(est_data2), "\n")## New estimation window observations: 60
if (nrow(est_data2) >= 55) {
# Extract factors
rf2 <- est_data2[, "RF"]
mkt_rf2 <- est_data2[, "Mkt_RF"]
smb2 <- est_data2[, "SMB"]
hml2 <- est_data2[, "HML"]
excess_returns2 <- est_data2[, tickers] - as.numeric(rf2)
# --- CAPM for new window ---
capm_betas2 <- numeric(length(tickers))
capm_resvar2 <- numeric(length(tickers))
for (i in seq_along(tickers)) {
fit <- lm(as.numeric(excess_returns2[, i]) ~ as.numeric(mkt_rf2))
capm_betas2[i] <- coef(fit)[2]
capm_resvar2[i] <- var(residuals(fit))
}
sigma2_mkt2 <- as.numeric(var(mkt_rf2))
Sigma_capm2 <- matrix(capm_betas2, ncol=1) %*% matrix(capm_betas2, nrow=1) * sigma2_mkt2 + diag(capm_resvar2)
mvp_capm2 <- solve.QP(2 * Sigma_capm2, dvec, Amat, bvec, meq)
w_capm2 <- mvp_capm2$solution
# --- FF3 for new window ---
ff3_betas2 <- matrix(0, nrow = length(tickers), ncol = 3)
ff3_resvar2 <- numeric(length(tickers))
for (i in seq_along(tickers)) {
fit <- lm(as.numeric(excess_returns2[, i]) ~
as.numeric(mkt_rf2) + as.numeric(smb2) + as.numeric(hml2))
ff3_betas2[i, ] <- coef(fit)[2:4]
ff3_resvar2[i] <- var(residuals(fit))
}
factors2 <- cbind(as.numeric(mkt_rf2), as.numeric(smb2), as.numeric(hml2))
Sigma_f2 <- cov(factors2)
Sigma_ff3_2 <- ff3_betas2 %*% Sigma_f2 %*% t(ff3_betas2) + diag(ff3_resvar2)
mvp_ff3_2 <- solve.QP(2 * Sigma_ff3_2, dvec, Amat, bvec, meq)
w_ff3_2 <- mvp_ff3_2$solution
cat("Updated CAPM MVP Weights (window 2020/04-2025/03):\n")
print(round(w_capm2, 4))
cat("\nUpdated FF3 MVP Weights (window 2020/04-2025/03):\n")
print(round(w_ff3_2, 4))
} else {
cat("Not enough data for the new estimation window.\n")
}## Updated CAPM MVP Weights (window 2020/04-2025/03):
## [1] 0.0000 0.0000 0.1847 0.0000 0.1140 0.3046 0.0000 0.3967
##
## Updated FF3 MVP Weights (window 2020/04-2025/03):
## [1] 0.0000 0.0000 0.1949 0.0000 0.1051 0.3064 0.0000 0.3936
# Get April 2025 actual returns
april_2025 <- merged_data["2025-04", tickers]
if (nrow(april_2025) > 0 && exists("w_capm2")) {
april_ret <- as.numeric(april_2025[1, ])
names(april_ret) <- tickers
realized_capm_april <- sum(w_capm2 * april_ret)
realized_ff3_april <- sum(w_ff3_2 * april_ret)
cat("Individual ETF Returns in April 2025:\n")
april_df <- data.frame(
ETF = tickers,
Return_Pct = round(april_ret * 100, 4),
CAPM_Weight = round(w_capm2, 4),
FF3_Weight = round(w_ff3_2, 4),
CAPM_Contrib = round(w_capm2 * april_ret * 100, 4),
FF3_Contrib = round(w_ff3_2 * april_ret * 100, 4)
)
kable(april_df, caption = "April 2025 Realized Returns and Contributions")
cat("\n\nRealized MVP Portfolio Return (April 2025):\n")
cat(" CAPM-based MVP:", round(realized_capm_april * 100, 4), "%\n")
cat(" FF3-based MVP:", round(realized_ff3_april * 100, 4), "%\n")
} else {
cat("April 2025 data not yet available or estimation not complete.\n")
cat("The April 2025 return will need to be computed once data becomes available.\n")
}## Individual ETF Returns in April 2025:
##
##
## Realized MVP Portfolio Return (April 2025):
## CAPM-based MVP: 2.1839 %
## FF3-based MVP: 2.1333 %
## ========================================
## SUMMARY OF REALIZED MVP PORTFOLIO RETURNS
## ========================================
if (exists("realized_capm_march")) {
cat("March 2025:\n")
cat(" CAPM MVP:", round(realized_capm_march * 100, 4), "%\n")
cat(" FF3 MVP:", round(realized_ff3_march * 100, 4), "%\n\n")
}## March 2025:
## CAPM MVP: 3.8576 %
## FF3 MVP: 3.773 %
if (exists("realized_capm_april")) {
cat("April 2025:\n")
cat(" CAPM MVP:", round(realized_capm_april * 100, 4), "%\n")
cat(" FF3 MVP:", round(realized_ff3_april * 100, 4), "%\n")
}## April 2025:
## CAPM MVP: 2.1839 %
## FF3 MVP: 2.1333 %
Reference: Investments, by Bodie, Kane and Marcus, 12th ed.
Problem: Repeat the analysis of Table 5.4 for the six Fama-French portfolios for two sub-periods: January 1930 through June 1974, and July 1974 through December 2018. Are the distributions from the two sub-periods stable (i.e., consistent across the two periods)?
Solution:
We download the Fama-French 6 portfolios (sorted by size and book-to-market) and compute descriptive statistics for each sub-period.
# Download FF 6 portfolios (25 size/BM portfolios - we use the 2x3)
ff6_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/6_Portfolios_2x3_CSV.zip"
temp6 <- tempfile()
download.file(ff6_url, temp6, mode = "wb")
unzip(temp6, exdir = tempdir())
ff6_file <- list.files(tempdir(), pattern = "6_Portfolios_2x3", full.names = TRUE)
ff6_file <- ff6_file[grepl("\\.CSV$", ff6_file, ignore.case = TRUE)]
ff6_raw <- read.csv(ff6_file[1], skip = 15, header = FALSE, stringsAsFactors = FALSE)
colnames(ff6_raw) <- c("Date", "SL", "SM", "SH", "BL", "BM", "BH")
# Keep monthly data only
ff6_raw <- ff6_raw[nchar(trimws(ff6_raw$Date)) == 6, ]
# Convert to numeric
for (col in c("SL", "SM", "SH", "BL", "BM", "BH")) {
ff6_raw[[col]] <- as.numeric(ff6_raw[[col]])
}
ff6_raw <- ff6_raw[complete.cases(ff6_raw), ]
# Convert to decimal
for (col in c("SL", "SM", "SH", "BL", "BM", "BH")) {
ff6_raw[[col]] <- ff6_raw[[col]] / 100
}
# Parse dates
ff6_raw$DateNum <- as.numeric(ff6_raw$Date)
ff6_raw$Year <- floor(ff6_raw$DateNum / 100)
ff6_raw$Month <- ff6_raw$DateNum %% 100
# Sub-period 1: Jan 1930 - Jun 1974
sp1 <- ff6_raw[ff6_raw$DateNum >= 193001 & ff6_raw$DateNum <= 197406, ]
# Sub-period 2: Jul 1974 - Dec 2018
sp2 <- ff6_raw[ff6_raw$DateNum >= 197407 & ff6_raw$DateNum <= 201812, ]
port_cols <- c("SL", "SM", "SH", "BL", "BM", "BH")
port_labels <- c("Small/Low", "Small/Mid", "Small/High",
"Big/Low", "Big/Mid", "Big/High")
# Compute statistics
compute_stats <- function(data, cols) {
data.frame(
Portfolio = port_labels,
Average = round(sapply(cols, function(c) mean(data[[c]])) * 100, 2),
SD = round(sapply(cols, function(c) sd(data[[c]])) * 100, 2),
Skewness = round(sapply(cols, function(c) {
x <- data[[c]]; n <- length(x)
(n / ((n-1)*(n-2))) * sum(((x - mean(x)) / sd(x))^3)
}), 2),
Kurtosis = round(sapply(cols, function(c) {
x <- data[[c]]; n <- length(x); m <- mean(x); s <- sd(x)
(n*(n+1)) / ((n-1)*(n-2)*(n-3)) * sum(((x-m)/s)^4) - 3*(n-1)^2/((n-2)*(n-3))
}), 2),
row.names = NULL
)
}
cat("Sub-period 1: January 1930 - June 1974\n")## Sub-period 1: January 1930 - June 1974
stats_sp1 <- compute_stats(sp1, port_cols)
kable(stats_sp1, caption = "Sub-period 1 (01/1930 - 06/1974)")| Portfolio | Average | SD | Skewness | Kurtosis |
|---|---|---|---|---|
| Small/Low | 24.31 | 76.47 | 5.89 | 42.69 |
| Small/Mid | 38.74 | 99.68 | 4.07 | 22.12 |
| Small/High | 41.94 | 106.76 | 4.09 | 22.50 |
| Big/Low | 105.01 | 240.19 | 3.31 | 11.31 |
| Big/Mid | 75.04 | 153.84 | 2.88 | 8.98 |
| Big/High | 47.61 | 126.36 | 4.85 | 27.70 |
##
## Sub-period 2: July 1974 - December 2018
stats_sp2 <- compute_stats(sp2, port_cols)
kable(stats_sp2, caption = "Sub-period 2 (07/1974 - 12/2018)")| Portfolio | Average | SD | Skewness | Kurtosis |
|---|---|---|---|---|
| Small/Low | 168.47 | 383.67 | 2.39 | 4.75 |
| Small/Mid | 170.78 | 375.02 | 2.12 | 3.16 |
| Small/High | 179.66 | 438.72 | 2.61 | 6.03 |
| Big/Low | 1371.21 | 4714.07 | 4.17 | 18.21 |
| Big/Mid | 980.79 | 3458.95 | 4.55 | 21.50 |
| Big/High | 856.62 | 3268.42 | 5.01 | 26.74 |
Conclusion: The distributions are not stable across the two periods:
These differences can be attributed to the greater frequency of systematic economic shocks and subsequent government intervention during the earlier period.
Problem: Consider two clients. Client 1 has \(E(r_C) = 8\%\), \(r_f = 5\%\), \(E(r_P) = 11\%\), \(\sigma_P = 15\%\).
Solution:
\(0.08 = 0.05 + y \times (0.11 - 0.05) \Rightarrow y = \frac{0.08 - 0.05}{0.11 - 0.05} = 0.50\)
So the client invests 50% in the risky portfolio.
\(\sigma_C = y \times \sigma_P = 0.50 \times 15\% = 7.5\%\)
The first client is more risk averse, preferring investments that have less risk as evidenced by the lower standard deviation.
Problem: Johnson requests the portfolio standard deviation to equal one half the market portfolio standard deviation. The market portfolio \(\sigma_M = 20\%\), which implies \(\sigma_P = 10\%\). The intercept of the CML equals \(r_f = 0.05\) and the slope of the CML equals the Sharpe ratio for the market portfolio (35%).
Solution:
Using the CML:
\[E(r_P) = r_f + \frac{E(r_M) - r_f}{\sigma_M} \times \sigma_P = 0.05 + 0.35 \times 0.10 = 0.085 = 8.5\%\]
Problem: Which indifference curve represents the optimal portfolio?
Answer: Indifference curve 2 because it is tangent to the CAL. This tangency point represents the highest level of utility achievable given the investment opportunity set defined by the CAL.
Problem: Which point on the graph represents the optimal portfolio?
Answer: Point E. This is the point where the highest indifference curve is tangent to the Capital Allocation Line.
Problem: An equity fund has an expected return equal to the T-bill rate plus a risk premium. The client invests 60% in the equity fund and 40% in T-bills.
Solution:
Problem: Even though it seems that gold is dominated by stocks, can gold still be an attractive asset?
Solution:
Even though gold appears dominated by stocks (lower expected return with comparable or higher standard deviation), gold might still be an attractive asset to hold as part of a portfolio. If the correlation between gold and stocks is sufficiently low, gold will be held as a component in a portfolio — specifically, the optimal tangency portfolio. The diversification benefit from the low correlation can offset gold’s inferior stand-alone risk-return profile.
If the correlation between gold and stocks equals +1, then no one would hold gold. The optimal CAL would be composed of bills and stocks only. Since the set of risk/return combinations of stocks and gold would plot as a straight line with a negative slope, these combinations would be dominated by the stock portfolio. Of course, this situation could not persist — if no one desired gold, its price would fall and its expected rate of return would increase until it became sufficiently attractive to include in a portfolio.
Problem: Since Stock A and Stock B are perfectly negatively correlated (\(\rho = -1\)), a risk-free portfolio can be created.
Solution:
With perfect negative correlation, the portfolio standard deviation is:
\[\sigma_P = |w_A \sigma_A - w_B \sigma_B|\]
Setting \(\sigma_P = 0\):
\[0 = 5 \times w_A - 10 \times (1 - w_A) \Rightarrow w_A = 0.6667\]
The expected rate of return for this risk-free portfolio is:
\[E(r) = (0.6667 \times 10\%) + (0.3333 \times 15\%) = 11.667\%\]
Therefore, the risk-free rate is 11.667%.
Problem: Portfolio analysis regarding adding a new stock (ABC) or government securities to an existing portfolio.
Solution:
a. Adding ABC stock (subscript OP = original portfolio, ABC = new stock, NP = new portfolio):
b. Adding Government Securities (subscript GS):
c. Adding risk-free government securities results in a lower beta for the new portfolio.
d. The comment is not correct. Although the respective standard deviations and expected returns for two securities may be equal, the covariances between each security and the original portfolio are unknown, making it impossible to draw the conclusion stated.
e.
Problem: Given four stocks with the following characteristics, construct the optimal risky portfolio using the Treynor-Black technique.
| Stock | Expected Return | Beta | Residual SD |
|---|---|---|---|
| A | 20% | 1.3 | 58% |
| B | 18% | 1.8 | 71% |
| C | 17% | 0.7 | 60% |
| D | 12% | 1.0 | 55% |
Market: \(E(r_M) = 16\%\), \(\sigma_M = 23\%\), \(r_f = 8\%\)
Solution:
a. Compute alphas:
| Stock | \(\alpha_i = r_i - [r_f + \beta_i(r_M - r_f)]\) | Expected excess return |
|---|---|---|
| A | \(20\% - [8\% + 1.3 \times 8\%] = 1.6\%\) | 12% |
| B | \(18\% - [8\% + 1.8 \times 8\%] = -4.4\%\) | 10% |
| C | \(17\% - [8\% + 0.7 \times 8\%] = 3.4\%\) | 9% |
| D | \(12\% - [8\% + 1.0 \times 8\%] = -4.0\%\) | 4% |
Stocks A and C have positive alphas, whereas stocks B and D have negative alphas.
Residual variances: \(\sigma^2(e_A) = 3364\), \(\sigma^2(e_B) = 5041\), \(\sigma^2(e_C) = 3600\), \(\sigma^2(e_D) = 3025\)
b. Using the Treynor-Black technique:
| Stock | \(\alpha/\sigma^2(e)\) | Weight in active portfolio |
|---|---|---|
| A | 0.000476 | -0.6142 |
| B | -0.000873 | 1.1265 |
| C | 0.000944 | -1.2181 |
| D | -0.001322 | 1.7058 |
Active portfolio: \(\alpha_A = -16.90\%\), \(\beta_A = 2.08\), \(\sigma(e_A) = 147.68\%\)
Position in active portfolio: \(w_0 = \frac{-0.1690/21809.6}{0.08/23^2} = -0.05124\)
Adjusted for beta: \(w^* = \frac{-0.05124}{1 + (1 - 2.08)(-0.05124)} = -0.0486\)
c. Information ratio: \(A = -16.90/147.68 = -0.1144\), \(A^2 = 0.0131\)
Sharpe ratio: \(S^2 = S_M^2 + A^2 = (8/23)^2 + 0.0131 = 0.1341\), \(S = 0.3662\)
d. Market Sharpe: \(S_M = 8/23 = 0.3478\). Improvement: 0.0184 — modest due to large residual variance.
e. Final portfolio composition: Bills 43.15%, M 59.61%, A 1.70%, B -3.11%, C 3.37%, D -4.71%.
Problem: Discussion of regression results for stocks ABC and XYZ and their effects on portfolio diversification.
Solution:
The regression results provide quantitative measures of return and risk based on monthly returns over the five-year period.
ABC: \(\beta = 0.60\) (low systematic risk), \(\alpha = -3.2\%\), \(\sigma(e) = 13.02\%\), \(R^2 = 0.35\).
XYZ: \(\beta = 0.97\) (average systematic risk), \(\alpha = 7.3\%\) (positive, large), \(\sigma(e) = 21.45\%\), \(R^2 = 0.17\).
Key observations:
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Asia/Shanghai
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_4.0.2 knitr_1.51
## [3] quadprog_1.5-8 PerformanceAnalytics_2.0.8
## [5] quantmod_0.4.28 TTR_0.24.4
## [7] xts_0.14.1 zoo_1.8-15
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 jsonlite_2.0.0 dplyr_1.2.0 compiler_4.4.1
## [5] tidyselect_1.2.1 jquerylib_0.1.4 scales_1.4.0 yaml_2.3.10
## [9] fastmap_1.2.0 lattice_0.22-6 R6_2.6.1 generics_0.1.3
## [13] curl_7.0.0 tibble_3.3.1 bslib_0.10.0 pillar_1.10.2
## [17] RColorBrewer_1.1-3 rlang_1.1.7 cachem_1.1.0 xfun_0.52
## [21] sass_0.4.10 S7_0.2.0 otel_0.2.0 cli_3.6.5
## [25] withr_3.0.2 magrittr_2.0.3 digest_0.6.37 grid_4.4.1
## [29] rstudioapi_0.17.1 lifecycle_1.0.5 vctrs_0.7.1 evaluate_1.0.5
## [33] glue_1.8.0 farver_2.1.2 rmarkdown_2.31 pkgconfig_2.0.3
## [37] tools_4.4.1 htmltools_0.5.8.1