#1. Import ETF data
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyquant)
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:xts':
##
## first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(xts)
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
etf_prices <- tq_get(tickers, from = "2010-01-01", to = Sys.Date(), get = "stock.prices")
#2. Calculate weekly and monthly simple returns
monthly_returns <- etf_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
type = "arithmetic",
col_rename = "monthly_return")
weekly_returns <- etf_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "weekly",
type = "arithmetic",
col_rename = "weekly_return")
#3. Convert monthly returns into tibble format
monthly_wide <- monthly_returns %>%
pivot_wider(names_from = symbol, values_from = monthly_return) %>%
rename(date = date) %>%
as_tibble()
#4. Download Fama-French 3-Factor data and convert to digits
ff_raw <- read.csv("F-F_Research_Data_Factors-3.csv", skip = 3)
ff_data <- ff_raw %>%
rename(date = X) %>%
filter(!is.na(RF)) %>%
mutate(date = as.Date(paste0(substr(date, 1, 4), "-", substr(date, 5, 6), "-01")),
MKT = as.numeric(Mkt.RF)/100,
SMB = as.numeric(SMB)/100,
HML = as.numeric(HML)/100,
RF = as.numeric(RF)/100) %>%
select(date, MKT, SMB, HML, RF)
## Warning: There were 4 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `MKT = as.numeric(Mkt.RF)/100`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 3 remaining warnings.
#5. Merge monthly returns with Fama-French data
merged_data <- left_join(monthly_wide, ff_data, by = "date")
#6. CAPM Covariance Matrix (2010/02–2015/01)
capm_data <- merged_data %>%
filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-01"))
# Excess returns = ETF returns - RF
excess_returns <- capm_data %>%
mutate(across(SPY:GLD, ~ . - RF, .names = "excess_{col}")) %>%
select(date, starts_with("excess_"))
# Remove NAs and convert to matrix
excess_mat <- excess_returns %>%
select(-date) %>%
na.omit() %>%
as.matrix()
# Covariance matrix
cov_capm <- cov(excess_mat)
#7. FF3 Covariance Matrix (2010/02–2015/01)
library(broom)
library(dplyr)
library(tidyr)
library(purrr)
# Step 1: Subset only the needed columns
ff3_excess <- capm_data %>%
mutate(across(SPY:GLD, ~ . - RF, .names = "excess_{.col}")) %>%
select(date, starts_with("excess_"), MKT, SMB, HML)
# Step 2: Pivot longer for regression
long_data <- ff3_excess %>%
pivot_longer(cols = starts_with("excess_"),
names_to = "asset",
values_to = "excess_return") %>%
drop_na(excess_return, MKT, SMB, HML) # Remove rows with any NA
# Step 3: Run FF3 regression per asset
residuals_df <- long_data %>%
group_by(asset) %>%
group_modify(~ {
if (nrow(.x) >= 10) { # Require minimum data to avoid empty lm
fit <- lm(excess_return ~ MKT + SMB + HML, data = .x)
tibble(date = .x$date, resid = resid(fit))
} else {
tibble(date = .x$date, resid = NA) # Skip if not enough data
}
}) %>%
pivot_wider(names_from = asset, values_from = resid)
# Step 4: Final residual matrix for covariance
resid_mat <- residuals_df %>%
select(-date) %>%
na.omit() %>%
as.matrix()
cov_ff3 <- cov(resid_mat)
#8. Global Minimum Variance (GMV) Portfolio Weights
# Round monthly_wide$date to the first of the month
monthly_wide <- monthly_wide %>%
mutate(date = as.Date(format(date, "%Y-%m-01")))
# Already done in ff_data:
# mutate(date = as.Date(paste0(substr(date, 1, 4), "-", substr(date, 5, 6), "-01")))
merged_data <- left_join(monthly_wide, ff_data, by = "date")
capm_data <- merged_data %>%
filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-01"))
summary(capm_data)
## date SPY QQQ EEM
## Min. :2010-02-01 Min. :-0.07945 Min. :-0.07392 Min. :-0.178947
## 1st Qu.:2011-04-23 1st Qu.:-0.01175 1st Qu.:-0.01182 1st Qu.:-0.026297
## Median :2012-07-16 Median : 0.01993 Median : 0.02390 Median :-0.001049
## Mean :2012-07-16 Mean : 0.01277 Mean : 0.01629 Mean : 0.003595
## 3rd Qu.:2013-10-08 3rd Qu.: 0.03555 3rd Qu.: 0.04642 3rd Qu.: 0.031060
## Max. :2015-01-01 Max. : 0.10915 Max. : 0.13173 Max. : 0.162678
## IWM EFA TLT IYR
## Min. :-0.11150 Min. :-0.111928 Min. :-0.067609 Min. :-0.10668
## 1st Qu.:-0.01888 1st Qu.:-0.022490 1st Qu.:-0.021487 1st Qu.:-0.01100
## Median : 0.02452 Median : 0.004836 Median : 0.005879 Median : 0.02457
## Mean : 0.01354 Mean : 0.006348 Mean : 0.010279 Mean : 0.01468
## 3rd Qu.: 0.04792 3rd Qu.: 0.037477 3rd Qu.: 0.033471 3rd Qu.: 0.04500
## Max. : 0.15101 Max. : 0.116104 Max. : 0.132061 Max. : 0.13190
## GLD MKT SMB
## Min. :-0.110623 Min. :-0.07900 Min. :-0.0418000
## 1st Qu.:-0.030766 1st Qu.:-0.01218 1st Qu.:-0.0126750
## Median : 0.004372 Median : 0.01990 Median : 0.0025500
## Mean : 0.003976 Mean : 0.01298 Mean : 0.0009483
## 3rd Qu.: 0.046968 3rd Qu.: 0.03795 3rd Qu.: 0.0134750
## Max. : 0.122749 Max. : 0.11330 Max. : 0.0484000
## HML RF
## Min. :-0.047300 Min. :0.000e+00
## 1st Qu.:-0.014850 1st Qu.:0.000e+00
## Median :-0.001500 Median :0.000e+00
## Mean :-0.001153 Mean :3.333e-05
## 3rd Qu.: 0.011825 3rd Qu.:1.000e-04
## Max. : 0.049000 Max. :1.000e-04
# 1. Ensure dates are aligned (if not already)
monthly_wide <- monthly_wide %>%
mutate(date = as.Date(format(date, "%Y-%m-01")))
# 2. Merge again to be safe
merged_data <- left_join(monthly_wide, ff_data, by = "date")
# 3. Filter for the period 2010-02 to 2015-01
capm_data <- merged_data %>%
filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-01"))
# 4. Calculate excess returns: ETF returns - RF
excess_returns <- capm_data %>%
mutate(across(SPY:GLD, ~ . - RF, .names = "excess_{col}")) %>%
select(date, starts_with("excess_"))
# 5. Remove NAs and ensure all columns have variance
excess_df_raw <- excess_returns %>% select(-date)
# Remove columns with all NA or zero variance
non_empty_cols <- colnames(excess_df_raw)[
colSums(is.na(excess_df_raw)) < nrow(excess_df_raw) &
apply(excess_df_raw, 2, function(x) sd(x, na.rm = TRUE) > 0)
]
# Final cleaned excess return matrix
excess_mat_clean <- excess_df_raw[, non_empty_cols] %>%
na.omit() %>%
as.matrix()
# 6. Compute and clean the covariance matrix
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
cov_capm <- cov(excess_mat_clean)
cov_capm_fixed <- as.matrix(nearPD(cov_capm)$mat)
# 7. GMV weight calculation function
library(quadprog)
get_gmv_weights <- function(cov_mat, ridge = 1e-8) {
cov_mat <- (cov_mat + t(cov_mat)) / 2 # Ensure symmetry
cov_mat <- cov_mat + diag(ncol(cov_mat)) * ridge
n <- ncol(cov_mat)
dvec <- rep(0, n)
Amat <- matrix(1, nrow = n, ncol = 1)
bvec <- 1
sol <- solve.QP(cov_mat, dvec, Amat, bvec, meq = 1)
w <- sol$solution
names(w) <- colnames(cov_mat)
return(w)
}
# 8. Run GMV optimization
gmv_capm_weights <- get_gmv_weights(cov_capm_fixed)
# 9. Print final weights
cat("✅ GMV Portfolio Weights (CAPM Model):\n")
## ✅ GMV Portfolio Weights (CAPM Model):
print(round(gmv_capm_weights, 4))
## excess_SPY excess_QQQ excess_EEM excess_IWM excess_EFA excess_TLT excess_IYR
## 0.9681 -0.1908 -0.0148 -0.0663 -0.0149 0.4704 -0.2045
## excess_GLD
## 0.0528
#9. Backtest GMV Portfolio Performance
library(xts)
# Convert your monthly_wide data frame (with a date column and asset returns columns) into xts:
# Make sure your date column is a Date class (or convert it)
monthly_wide$date <- as.Date(monthly_wide$date)
# Select all asset columns except date
rets_mat <- monthly_wide %>% select(-date)
# Create xts object
returns_xts <- xts(rets_mat, order.by = monthly_wide$date)
library(dplyr)
library(tidyr)
library(broom)
library(xts)
library(PerformanceAnalytics)
# Your get_gmv_weights function assumed loaded here
backtest_gmv <- function(returns_xts, ff_data, merged_data, window = 60, model = c("capm", "ff3")) {
model <- match.arg(model)
dates <- index(returns_xts)
weights_list <- list()
port_returns <- c()
for (i in seq(window + 1, nrow(returns_xts))) {
rets_window <- returns_xts[(i - window):(i - 1), ]
# Get risk-free rate window for excess returns
rf_window <- ff_data %>%
filter(date %in% dates[(i - window):(i - 1)]) %>%
pull(RF)
excess_window <- sweep(rets_window, 1, rf_window, "-")
if (model == "capm") {
cov_mat <- cov(na.omit(excess_window))
} else if (model == "ff3") {
date_slice <- dates[(i - window):(i - 1)]
# Prepare merged data for FF3 residual calculation
tmp_data <- merged_data %>%
filter(date %in% date_slice) %>%
mutate(across(SPY:GLD, ~ . - RF, .names = "excess_{col}"))
# Convert to long format for each asset's excess returns
long_df <- tmp_data %>%
select(date, starts_with("excess_"), MKT, SMB, HML) %>%
pivot_longer(cols = starts_with("excess_"), names_to = "asset", values_to = "excess_return") %>%
mutate(asset = sub("excess_", "", asset))
# Calculate residuals per asset, carefully handling missing data
residuals_df <- long_df %>%
group_by(asset) %>%
group_modify(function(.x, .y) {
data_complete <- na.omit(.x)
fit <- lm(excess_return ~ MKT + SMB + HML, data = data_complete)
# Use broom::augment on complete data and select residuals
resids <- broom::augment(fit, data = data_complete) %>%
select(.resid)
# Bind residuals with original dates for that asset
tibble(date = data_complete$date, resid = resids$.resid)
}) %>%
ungroup()
# Pivot residuals to wide matrix for covariance calculation
residuals_wide <- residuals_df %>%
pivot_wider(names_from = asset, values_from = resid) %>%
arrange(date)
# Remove date column and convert to matrix (rows = time, cols = assets)
residuals_mat <- residuals_wide %>%
select(-date) %>%
as.matrix()
# Covariance matrix of residuals (drop rows with NA)
cov_mat <- cov(na.omit(residuals_mat))
}
# Compute GMV weights
w <- get_gmv_weights(cov_mat)
# Calculate portfolio return at date i
r <- as.numeric(returns_xts[i, ]) %*% w
port_returns <- c(port_returns, r)
}
# Return xts object of portfolio returns
port_xts <- xts(port_returns, order.by = dates[(window + 1):nrow(returns_xts)])
return(port_xts)
}
# Assuming you have these objects loaded:
# returns_xts = your monthly returns xts object (assets as columns)
# ff_data = Fama-French factors data.frame with 'date' and 'RF' columns
# merged_data = data.frame including date, asset returns, RF, MKT, SMB, HML factors
# Run backtests
gmv_capm_backtest <- backtest_gmv(returns_xts, ff_data, merged_data, model = "capm")
## Warning in sweep(rets_window, 1, rf_window, "-"): STATS does not recycle
## exactly across MARGIN
gmv_ff3_backtest <- backtest_gmv(returns_xts, ff_data, merged_data, model = "ff3")
## Warning in sweep(rets_window, 1, rf_window, "-"): STATS does not recycle
## exactly across MARGIN
# Plot cumulative returns
combined <- merge(gmv_capm_backtest, gmv_ff3_backtest)
colnames(combined) <- c("CAPM GMV", "FF3 GMV")
charts.PerformanceSummary(combined, main = "Backtest: GMV Portfolios")