knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(PerformanceAnalytics)
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(quadprog)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(knitr)
library(xts)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
assets <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

getSymbols(assets, src = "yahoo", from = "2010-01-01", to = "2025-12-31", auto.assign = TRUE)
## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"
# Merge adjusted closes using a more compact apply loop
price_data <- do.call(merge, lapply(assets, function(sym) Ad(get(sym))))
names(price_data) <- assets

cat("Dimensions of daily prices:", dim(price_data), "\n")
## Dimensions of daily prices: 4023 8
head(price_data)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 84.79638 40.29079 30.35151 51.36656 35.12844 56.13517 26.76811
## 2010-01-05 85.02086 40.29079 30.57180 51.18993 35.15940 56.49770 26.83238
## 2010-01-06 85.08073 40.04776 30.63576 51.14176 35.30801 55.74142 26.82070
## 2010-01-07 85.43984 40.07379 30.45811 51.51910 35.17179 55.83514 27.06027
## 2010-01-08 85.72417 40.40363 30.69972 51.80009 35.45043 55.81015 26.87913
## 2010-01-11 85.84388 40.23870 30.63576 51.59137 35.74147 55.50389 27.00769
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# Aggregate to end of month and calculate discrete returns
monthly_px  <- to.monthly(price_data, indexAt = "lastof", OHLC = FALSE)
rtn_monthly <- na.omit(Return.calculate(monthly_px, method = "discrete"))

cat("Dimensions of monthly returns:", dim(rtn_monthly), "\n")
## Dimensions of monthly returns: 191 8
head(rtn_monthly)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-02-28  0.03119499  0.04603859  0.017763914  0.04475161  0.002667268
## 2010-03-31  0.06087965  0.07710950  0.081108572  0.08230654  0.063854445
## 2010-04-30  0.01546998  0.02242527 -0.001662004  0.05678484 -0.028045771
## 2010-05-31 -0.07945464 -0.07392410 -0.093935459 -0.07536603 -0.111928161
## 2010-06-30 -0.05174092 -0.05975651 -0.013986497 -0.07743441 -0.020619544
## 2010-07-31  0.06830037  0.07258303  0.109324702  0.06730919  0.116104081
##                     TLT         IYR          GLD
## 2010-02-28 -0.003424632  0.05457025  0.032748219
## 2010-03-31 -0.020573687  0.09748509 -0.004386396
## 2010-04-30  0.033217963  0.06388056  0.058834363
## 2010-05-31  0.051083805 -0.05683518  0.030513147
## 2010-06-30  0.057979269 -0.04670091  0.023553189
## 2010-07-31 -0.009463398  0.09404812 -0.050871157
ff_url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
temp_z <- tempfile()
download.file(ff_url, temp_z, quiet = TRUE)

# Extract and isolate the specific CSV
csv_path <- unzip(temp_z, exdir = tempdir())
ff_file  <- csv_path[grepl("Factors.*csv$", csv_path, ignore.case = TRUE)][1]

if (is.na(ff_file)) stop("Fama-French CSV not located.")

# Parse text to skip headers and footers dynamically
raw_text  <- readLines(ff_file)
idx_start <- which(grepl("^\\s*[0-9]{6}", raw_text))[1]
idx_end   <- which(grepl("^\\s*$", raw_text[idx_start:length(raw_text)]))[1] + idx_start - 2

ff3 <- read.csv(ff_file, skip = idx_start - 1, nrows = idx_end - idx_start + 1, header = FALSE)
names(ff3) <- c("Date", "Mkt_RF", "SMB", "HML", "RF")

# Format dates to EOM using lubridate and scale to decimals
ff3$Date <- ceiling_date(as.Date(paste0(ff3$Date, "01"), "%Y%m%d"), "month") - days(1)
ff3[, 2:5] <- ff3[, 2:5] / 100

ff_xts <- xts(ff3[, -1], order.by = ff3$Date)

cat("FF3 Data Dimensions:", dim(ff_xts), "\n")
## FF3 Data Dimensions: 1196 4
tail(ff_xts, 5)
##             Mkt_RF     SMB     HML     RF
## 2025-10-31  0.0196 -0.0055 -0.0310 0.0037
## 2025-11-30 -0.0013  0.0038  0.0376 0.0030
## 2025-12-31 -0.0036 -0.0106  0.0242 0.0034
## 2026-01-31  0.0102  0.0220  0.0372 0.0030
## 2026-02-28 -0.0117  0.0014  0.0283 0.0028
# Standardize index dates using lubridate for a clean inner join
index(rtn_monthly) <- ceiling_date(index(rtn_monthly), "month") - days(1)

df_merged <- na.omit(merge(rtn_monthly, ff_xts, join = "inner"))

cat("Merged Data Dimensions:", dim(df_merged), "\n")
## Merged Data Dimensions: 191 12
cat("Period:", format(min(index(df_merged))), "to", format(max(index(df_merged))), "\n")
## Period: 2010-02-28 to 2025-12-31
head(df_merged)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-02-28  0.03119499  0.04603859  0.017763914  0.04475161  0.002667268
## 2010-03-31  0.06087965  0.07710950  0.081108572  0.08230654  0.063854445
## 2010-04-30  0.01546998  0.02242527 -0.001662004  0.05678484 -0.028045771
## 2010-05-31 -0.07945464 -0.07392410 -0.093935459 -0.07536603 -0.111928161
## 2010-06-30 -0.05174092 -0.05975651 -0.013986497 -0.07743441 -0.020619544
## 2010-07-31  0.06830037  0.07258303  0.109324702  0.06730919  0.116104081
##                     TLT         IYR          GLD  Mkt_RF     SMB     HML    RF
## 2010-02-28 -0.003424632  0.05457025  0.032748219  0.0339  0.0118  0.0318 0e+00
## 2010-03-31 -0.020573687  0.09748509 -0.004386396  0.0630  0.0146  0.0219 1e-04
## 2010-04-30  0.033217963  0.06388056  0.058834363  0.0199  0.0484  0.0296 1e-04
## 2010-05-31  0.051083805 -0.05683518  0.030513147 -0.0790  0.0013 -0.0248 1e-04
## 2010-06-30  0.057979269 -0.04670091  0.023553189 -0.0556 -0.0179 -0.0473 1e-04
## 2010-07-31 -0.009463398  0.09404812 -0.050871157  0.0692  0.0022 -0.0050 1e-04
# Define 60-month trailing window
win_capm <- df_merged["2020-03/2025-02"]

# Extract components as matrices/vectors
Y     <- as.matrix(coredata(win_capm[, assets]))
X_mkt <- as.numeric(coredata(win_capm$Mkt_RF))
R_f   <- as.numeric(coredata(win_capm$RF))

# Compute excess returns
Y_excess <- sweep(Y, 1, R_f, "-")

# Calculate betas and residuals using an apply loop
beta_vec   <- apply(Y_excess, 2, function(y) lm(y ~ X_mkt)$coef[2])
err_matrix <- Y_excess - outer(X_mkt, beta_vec)

# Construct Covariance Matrix (Systematic + Idiosyncratic)
Cov_CAPM <- (beta_vec %o% beta_vec) * var(X_mkt) + diag(apply(err_matrix, 2, var))

cat("\nCAPM Covariance Matrix (3x3 Sample):\n")
## 
## CAPM Covariance Matrix (3x3 Sample):
print(round(Cov_CAPM[1:3, 1:3], 6))
##          SPY      QQQ      EEM
## SPY 0.002623 0.002884 0.001888
## QQQ 0.002884 0.003791 0.002102
## EEM 0.001888 0.002102 0.002712
# MVP Optimization Configuration
n_assets <- length(assets)
A_mat    <- cbind(1, diag(n_assets))
b_vec    <- c(1, rep(0, n_assets))

sol_capm <- solve.QP(Dmat = 2 * Cov_CAPM, dvec = rep(0, n_assets), 
                     Amat = A_mat, bvec = b_vec, meq = 1)
w_capm   <- setNames(sol_capm$solution, assets)

cat("\nOptimal Weights (CAPM):\n")
## 
## Optimal Weights (CAPM):
print(round(w_capm, 4))
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.0000 0.0000 0.1401 0.0000 0.0838 0.3425 0.0000 0.4336
er_capm  <- colMeans(Y) %*% w_capm
vol_capm <- sqrt(t(w_capm) %*% Cov_CAPM %*% w_capm)

cat("\nExpected Return:", round(er_capm, 5))
## 
## Expected Return: 0.00393
cat("\nPortfolio Risk (Std Dev):", round(vol_capm, 5), "\n")
## 
## Portfolio Risk (Std Dev): 0.02984
# Define FF3 independent variables
X_ff3 <- cbind(X_mkt, 
               as.numeric(coredata(win_capm$SMB)), 
               as.numeric(coredata(win_capm$HML)))

# Regress to find factor loadings (Betas) and residuals using sapply
B_mat   <- t(sapply(1:n_assets, function(i) lm(Y_excess[, i] ~ X_ff3)$coef[-1]))
err_ff3 <- sapply(1:n_assets, function(i) residuals(lm(Y_excess[, i] ~ X_ff3)))

# Construct Multi-Factor Covariance Matrix
Cov_FF3 <- B_mat %*% cov(X_ff3) %*% t(B_mat) + diag(apply(err_ff3, 2, var))

cat("FF3 Covariance Matrix (3x3 Sample):\n")
## FF3 Covariance Matrix (3x3 Sample):
print(round(Cov_FF3[1:3, 1:3], 6))
##          [,1]     [,2]     [,3]
## [1,] 0.002623 0.002885 0.001881
## [2,] 0.002885 0.003791 0.001964
## [3,] 0.001881 0.001964 0.002712
# Optimize FF3 MVP
sol_ff3 <- solve.QP(Dmat = 2 * Cov_FF3, dvec = rep(0, n_assets), 
                    Amat = A_mat, bvec = b_vec, meq = 1)
w_ff3   <- setNames(sol_ff3$solution, assets)

cat("\nOptimal Weights (FF3):\n")
## 
## Optimal Weights (FF3):
print(round(w_ff3, 4))
##    SPY    QQQ    EEM    IWM    EFA    TLT    IYR    GLD 
## 0.0000 0.0000 0.1565 0.0000 0.0821 0.3391 0.0000 0.4223
er_ff3  <- colMeans(Y) %*% w_ff3
vol_ff3 <- sqrt(t(w_ff3) %*% Cov_FF3 %*% w_ff3)

cat("\nExpected Return:", round(er_ff3, 5))
## 
## Expected Return: 0.00388
cat("\nPortfolio Risk (Std Dev):", round(vol_ff3, 5), "\n")
## 
## Portfolio Risk (Std Dev): 0.02974
actuals_mar <- as.numeric(rtn_monthly["2025-03", assets])
names(actuals_mar) <- assets

cat("March 2025 Returns:\n")
## March 2025 Returns:
print(round(actuals_mar, 4))
##     SPY     QQQ     EEM     IWM     EFA     TLT     IYR     GLD 
## -0.0557 -0.0759  0.0113 -0.0685  0.0018 -0.0120 -0.0234  0.0945
perf_capm_mar <- sum(w_capm * actuals_mar)
perf_ff3_mar  <- sum(w_ff3 * actuals_mar)

kable(data.frame(
  Method = c("CAPM Model", "Fama-French 3"),
  Realized_Return = round(c(perf_capm_mar, perf_ff3_mar) * 100, 3)
), col.names = c("Optimization Method", "Realized Return (%)"),
   caption = "March 2025 Out-of-Sample Performance")
March 2025 Out-of-Sample Performance
Optimization Method Realized Return (%)
CAPM Model 3.858
Fama-French 3 3.773
# Shift window forward by 1 month
win_apr <- df_merged["2020-04/2025-03"]

Y_apr     <- as.matrix(coredata(win_apr[, assets]))
X_mkt_apr <- as.numeric(coredata(win_apr$Mkt_RF))
R_f_apr   <- as.numeric(coredata(win_apr$RF))
Y_exc_apr <- sweep(Y_apr, 1, R_f_apr, "-")

# -- CAPM Roll --
b_capm_apr <- apply(Y_exc_apr, 2, function(y) lm(y ~ X_mkt_apr)$coef[2])
e_capm_apr <- Y_exc_apr - outer(X_mkt_apr, b_capm_apr)
Cov_C_apr  <- (b_capm_apr %o% b_capm_apr) * var(X_mkt_apr) + diag(apply(e_capm_apr, 2, var))

w_capm_apr <- setNames(solve.QP(2 * Cov_C_apr, rep(0, n_assets), A_mat, b_vec, meq=1)$solution, assets)

# -- FF3 Roll --
X_ff3_apr <- cbind(X_mkt_apr, as.numeric(coredata(win_apr$SMB)), as.numeric(coredata(win_apr$HML)))
B_ff3_apr <- t(sapply(1:n_assets, function(i) lm(Y_exc_apr[, i] ~ X_ff3_apr)$coef[-1]))
e_ff3_apr <- sapply(1:n_assets, function(i) residuals(lm(Y_exc_apr[, i] ~ X_ff3_apr)))
Cov_F_apr <- B_ff3_apr %*% cov(X_ff3_apr) %*% t(B_ff3_apr) + diag(apply(e_ff3_apr, 2, var))

w_ff3_apr <- setNames(solve.QP(2 * Cov_F_apr, rep(0, n_assets), A_mat, b_vec, meq=1)$solution, assets)

# -- April 2025 Performance --
actuals_apr <- as.numeric(rtn_monthly["2025-04", assets])

perf_capm_apr <- sum(w_capm_apr * actuals_apr)
perf_ff3_apr  <- sum(w_ff3_apr * actuals_apr)

kable(data.frame(
  Method = c("CAPM Model", "Fama-French 3"),
  Window = "2020/04 - 2025/03",
  Realized_Return = round(c(perf_capm_apr, perf_ff3_apr) * 100, 3)
), col.names = c("Optimization Method", "Training Window", "Realized Return (%)"),
   caption = "April 2025 Out-of-Sample Performance")
April 2025 Out-of-Sample Performance
Optimization Method Training Window Realized Return (%)
CAPM Model 2020/04 - 2025/03 2.184
Fama-French 3 2020/04 - 2025/03 2.133