knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(PerformanceAnalytics)
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
library(quadprog)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
##
## first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(knitr)
library(xts)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
assets <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
getSymbols(assets, src = "yahoo", from = "2010-01-01", to = "2025-12-31", auto.assign = TRUE)
## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"
# Merge adjusted closes using a more compact apply loop
price_data <- do.call(merge, lapply(assets, function(sym) Ad(get(sym))))
names(price_data) <- assets
cat("Dimensions of daily prices:", dim(price_data), "\n")
## Dimensions of daily prices: 4023 8
head(price_data)
## SPY QQQ EEM IWM EFA TLT IYR
## 2010-01-04 84.79638 40.29079 30.35151 51.36656 35.12844 56.13517 26.76811
## 2010-01-05 85.02086 40.29079 30.57180 51.18993 35.15940 56.49770 26.83238
## 2010-01-06 85.08073 40.04776 30.63576 51.14176 35.30801 55.74142 26.82070
## 2010-01-07 85.43984 40.07379 30.45811 51.51910 35.17179 55.83514 27.06027
## 2010-01-08 85.72417 40.40363 30.69972 51.80009 35.45043 55.81015 26.87913
## 2010-01-11 85.84388 40.23870 30.63576 51.59137 35.74147 55.50389 27.00769
## GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# Aggregate to end of month and calculate discrete returns
monthly_px <- to.monthly(price_data, indexAt = "lastof", OHLC = FALSE)
rtn_monthly <- na.omit(Return.calculate(monthly_px, method = "discrete"))
cat("Dimensions of monthly returns:", dim(rtn_monthly), "\n")
## Dimensions of monthly returns: 191 8
head(rtn_monthly)
## SPY QQQ EEM IWM EFA
## 2010-02-28 0.03119499 0.04603859 0.017763914 0.04475161 0.002667268
## 2010-03-31 0.06087965 0.07710950 0.081108572 0.08230654 0.063854445
## 2010-04-30 0.01546998 0.02242527 -0.001662004 0.05678484 -0.028045771
## 2010-05-31 -0.07945464 -0.07392410 -0.093935459 -0.07536603 -0.111928161
## 2010-06-30 -0.05174092 -0.05975651 -0.013986497 -0.07743441 -0.020619544
## 2010-07-31 0.06830037 0.07258303 0.109324702 0.06730919 0.116104081
## TLT IYR GLD
## 2010-02-28 -0.003424632 0.05457025 0.032748219
## 2010-03-31 -0.020573687 0.09748509 -0.004386396
## 2010-04-30 0.033217963 0.06388056 0.058834363
## 2010-05-31 0.051083805 -0.05683518 0.030513147
## 2010-06-30 0.057979269 -0.04670091 0.023553189
## 2010-07-31 -0.009463398 0.09404812 -0.050871157
ff_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
temp_z <- tempfile()
download.file(ff_url, temp_z, quiet = TRUE)
# Extract and isolate the specific CSV
csv_path <- unzip(temp_z, exdir = tempdir())
ff_file <- csv_path[grepl("Factors.*csv$", csv_path, ignore.case = TRUE)][1]
if (is.na(ff_file)) stop("Fama-French CSV not located.")
# Parse text to skip headers and footers dynamically
raw_text <- readLines(ff_file)
idx_start <- which(grepl("^\\s*[0-9]{6}", raw_text))[1]
idx_end <- which(grepl("^\\s*$", raw_text[idx_start:length(raw_text)]))[1] + idx_start - 2
ff3 <- read.csv(ff_file, skip = idx_start - 1, nrows = idx_end - idx_start + 1, header = FALSE)
names(ff3) <- c("Date", "Mkt_RF", "SMB", "HML", "RF")
# Format dates to EOM using lubridate and scale to decimals
ff3$Date <- ceiling_date(as.Date(paste0(ff3$Date, "01"), "%Y%m%d"), "month") - days(1)
ff3[, 2:5] <- ff3[, 2:5] / 100
ff_xts <- xts(ff3[, -1], order.by = ff3$Date)
cat("FF3 Data Dimensions:", dim(ff_xts), "\n")
## FF3 Data Dimensions: 1196 4
tail(ff_xts, 5)
## Mkt_RF SMB HML RF
## 2025-10-31 0.0196 -0.0055 -0.0310 0.0037
## 2025-11-30 -0.0013 0.0038 0.0376 0.0030
## 2025-12-31 -0.0036 -0.0106 0.0242 0.0034
## 2026-01-31 0.0102 0.0220 0.0372 0.0030
## 2026-02-28 -0.0117 0.0014 0.0283 0.0028
# Standardize index dates using lubridate for a clean inner join
index(rtn_monthly) <- ceiling_date(index(rtn_monthly), "month") - days(1)
df_merged <- na.omit(merge(rtn_monthly, ff_xts, join = "inner"))
cat("Merged Data Dimensions:", dim(df_merged), "\n")
## Merged Data Dimensions: 191 12
cat("Period:", format(min(index(df_merged))), "to", format(max(index(df_merged))), "\n")
## Period: 2010-02-28 to 2025-12-31
head(df_merged)
## SPY QQQ EEM IWM EFA
## 2010-02-28 0.03119499 0.04603859 0.017763914 0.04475161 0.002667268
## 2010-03-31 0.06087965 0.07710950 0.081108572 0.08230654 0.063854445
## 2010-04-30 0.01546998 0.02242527 -0.001662004 0.05678484 -0.028045771
## 2010-05-31 -0.07945464 -0.07392410 -0.093935459 -0.07536603 -0.111928161
## 2010-06-30 -0.05174092 -0.05975651 -0.013986497 -0.07743441 -0.020619544
## 2010-07-31 0.06830037 0.07258303 0.109324702 0.06730919 0.116104081
## TLT IYR GLD Mkt_RF SMB HML RF
## 2010-02-28 -0.003424632 0.05457025 0.032748219 0.0339 0.0118 0.0318 0e+00
## 2010-03-31 -0.020573687 0.09748509 -0.004386396 0.0630 0.0146 0.0219 1e-04
## 2010-04-30 0.033217963 0.06388056 0.058834363 0.0199 0.0484 0.0296 1e-04
## 2010-05-31 0.051083805 -0.05683518 0.030513147 -0.0790 0.0013 -0.0248 1e-04
## 2010-06-30 0.057979269 -0.04670091 0.023553189 -0.0556 -0.0179 -0.0473 1e-04
## 2010-07-31 -0.009463398 0.09404812 -0.050871157 0.0692 0.0022 -0.0050 1e-04
# Define 60-month trailing window
win_capm <- df_merged["2020-03/2025-02"]
# Extract components as matrices/vectors
Y <- as.matrix(coredata(win_capm[, assets]))
X_mkt <- as.numeric(coredata(win_capm$Mkt_RF))
R_f <- as.numeric(coredata(win_capm$RF))
# Compute excess returns
Y_excess <- sweep(Y, 1, R_f, "-")
# Calculate betas and residuals using an apply loop
beta_vec <- apply(Y_excess, 2, function(y) lm(y ~ X_mkt)$coef[2])
err_matrix <- Y_excess - outer(X_mkt, beta_vec)
# Construct Covariance Matrix (Systematic + Idiosyncratic)
Cov_CAPM <- (beta_vec %o% beta_vec) * var(X_mkt) + diag(apply(err_matrix, 2, var))
cat("\nCAPM Covariance Matrix (3x3 Sample):\n")
##
## CAPM Covariance Matrix (3x3 Sample):
print(round(Cov_CAPM[1:3, 1:3], 6))
## SPY QQQ EEM
## SPY 0.002623 0.002884 0.001888
## QQQ 0.002884 0.003791 0.002102
## EEM 0.001888 0.002102 0.002712
# MVP Optimization Configuration
n_assets <- length(assets)
A_mat <- cbind(1, diag(n_assets))
b_vec <- c(1, rep(0, n_assets))
sol_capm <- solve.QP(Dmat = 2 * Cov_CAPM, dvec = rep(0, n_assets),
Amat = A_mat, bvec = b_vec, meq = 1)
w_capm <- setNames(sol_capm$solution, assets)
cat("\nOptimal Weights (CAPM):\n")
##
## Optimal Weights (CAPM):
print(round(w_capm, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.0000 0.0000 0.1401 0.0000 0.0838 0.3425 0.0000 0.4336
er_capm <- colMeans(Y) %*% w_capm
vol_capm <- sqrt(t(w_capm) %*% Cov_CAPM %*% w_capm)
cat("\nExpected Return:", round(er_capm, 5))
##
## Expected Return: 0.00393
cat("\nPortfolio Risk (Std Dev):", round(vol_capm, 5), "\n")
##
## Portfolio Risk (Std Dev): 0.02984
# Define FF3 independent variables
X_ff3 <- cbind(X_mkt,
as.numeric(coredata(win_capm$SMB)),
as.numeric(coredata(win_capm$HML)))
# Regress to find factor loadings (Betas) and residuals using sapply
B_mat <- t(sapply(1:n_assets, function(i) lm(Y_excess[, i] ~ X_ff3)$coef[-1]))
err_ff3 <- sapply(1:n_assets, function(i) residuals(lm(Y_excess[, i] ~ X_ff3)))
# Construct Multi-Factor Covariance Matrix
Cov_FF3 <- B_mat %*% cov(X_ff3) %*% t(B_mat) + diag(apply(err_ff3, 2, var))
cat("FF3 Covariance Matrix (3x3 Sample):\n")
## FF3 Covariance Matrix (3x3 Sample):
print(round(Cov_FF3[1:3, 1:3], 6))
## [,1] [,2] [,3]
## [1,] 0.002623 0.002885 0.001881
## [2,] 0.002885 0.003791 0.001964
## [3,] 0.001881 0.001964 0.002712
# Optimize FF3 MVP
sol_ff3 <- solve.QP(Dmat = 2 * Cov_FF3, dvec = rep(0, n_assets),
Amat = A_mat, bvec = b_vec, meq = 1)
w_ff3 <- setNames(sol_ff3$solution, assets)
cat("\nOptimal Weights (FF3):\n")
##
## Optimal Weights (FF3):
print(round(w_ff3, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.0000 0.0000 0.1565 0.0000 0.0821 0.3391 0.0000 0.4223
er_ff3 <- colMeans(Y) %*% w_ff3
vol_ff3 <- sqrt(t(w_ff3) %*% Cov_FF3 %*% w_ff3)
cat("\nExpected Return:", round(er_ff3, 5))
##
## Expected Return: 0.00388
cat("\nPortfolio Risk (Std Dev):", round(vol_ff3, 5), "\n")
##
## Portfolio Risk (Std Dev): 0.02974
actuals_mar <- as.numeric(rtn_monthly["2025-03", assets])
names(actuals_mar) <- assets
cat("March 2025 Returns:\n")
## March 2025 Returns:
print(round(actuals_mar, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## -0.0557 -0.0759 0.0113 -0.0685 0.0018 -0.0120 -0.0234 0.0945
perf_capm_mar <- sum(w_capm * actuals_mar)
perf_ff3_mar <- sum(w_ff3 * actuals_mar)
kable(data.frame(
Method = c("CAPM Model", "Fama-French 3"),
Realized_Return = round(c(perf_capm_mar, perf_ff3_mar) * 100, 3)
), col.names = c("Optimization Method", "Realized Return (%)"),
caption = "March 2025 Out-of-Sample Performance")
March 2025 Out-of-Sample Performance
| CAPM Model |
3.858 |
| Fama-French 3 |
3.773 |
# Shift window forward by 1 month
win_apr <- df_merged["2020-04/2025-03"]
Y_apr <- as.matrix(coredata(win_apr[, assets]))
X_mkt_apr <- as.numeric(coredata(win_apr$Mkt_RF))
R_f_apr <- as.numeric(coredata(win_apr$RF))
Y_exc_apr <- sweep(Y_apr, 1, R_f_apr, "-")
# -- CAPM Roll --
b_capm_apr <- apply(Y_exc_apr, 2, function(y) lm(y ~ X_mkt_apr)$coef[2])
e_capm_apr <- Y_exc_apr - outer(X_mkt_apr, b_capm_apr)
Cov_C_apr <- (b_capm_apr %o% b_capm_apr) * var(X_mkt_apr) + diag(apply(e_capm_apr, 2, var))
w_capm_apr <- setNames(solve.QP(2 * Cov_C_apr, rep(0, n_assets), A_mat, b_vec, meq=1)$solution, assets)
# -- FF3 Roll --
X_ff3_apr <- cbind(X_mkt_apr, as.numeric(coredata(win_apr$SMB)), as.numeric(coredata(win_apr$HML)))
B_ff3_apr <- t(sapply(1:n_assets, function(i) lm(Y_exc_apr[, i] ~ X_ff3_apr)$coef[-1]))
e_ff3_apr <- sapply(1:n_assets, function(i) residuals(lm(Y_exc_apr[, i] ~ X_ff3_apr)))
Cov_F_apr <- B_ff3_apr %*% cov(X_ff3_apr) %*% t(B_ff3_apr) + diag(apply(e_ff3_apr, 2, var))
w_ff3_apr <- setNames(solve.QP(2 * Cov_F_apr, rep(0, n_assets), A_mat, b_vec, meq=1)$solution, assets)
# -- April 2025 Performance --
actuals_apr <- as.numeric(rtn_monthly["2025-04", assets])
perf_capm_apr <- sum(w_capm_apr * actuals_apr)
perf_ff3_apr <- sum(w_ff3_apr * actuals_apr)
kable(data.frame(
Method = c("CAPM Model", "Fama-French 3"),
Window = "2020/04 - 2025/03",
Realized_Return = round(c(perf_capm_apr, perf_ff3_apr) * 100, 3)
), col.names = c("Optimization Method", "Training Window", "Realized Return (%)"),
caption = "April 2025 Out-of-Sample Performance")
April 2025 Out-of-Sample Performance
| CAPM Model |
2020/04 - 2025/03 |
2.184 |
| Fama-French 3 |
2020/04 - 2025/03 |
2.133 |