# ETF Portfolio Analysis with CAPM and Fama-French Models
# Load required libraries
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.27 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
library(purrr)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:xts':
##
## first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(quantmod)
library(xts)
library(PerformanceAnalytics)
library(tidyr)
# 1. Import ETF data from Yahoo Finance
# Define ETF tickers
etf_tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
# Download data from 2010 to current date
start_date <- "2010-01-01"
end_date <- Sys.Date()
# Download adjusted closing prices
etf_prices <- tq_get(etf_tickers,
get = "stock.prices",
from = start_date,
to = end_date) %>%
select(symbol, date, adjusted) %>%
pivot_wider(names_from = symbol, values_from = adjusted) %>%
arrange(date)
# Convert to xts format for easier manipulation
etf_prices_xts <- etf_prices %>%
tk_xts(date_var = date)
## Warning: Non-numeric columns being dropped: date
# Display first few rows
head(etf_prices_xts)
## SPY QQQ EEM IWM EFA TLT IYR
## 2010-01-04 85.76846 40.48581 31.08410 51.92011 36.38437 58.25044 27.40289
## 2010-01-05 85.99548 40.48581 31.30972 51.74157 36.41644 58.62659 27.46869
## 2010-01-06 86.05602 40.24160 31.37521 51.69289 36.57036 57.84182 27.45674
## 2010-01-07 86.41929 40.26776 31.19327 52.07429 36.42926 57.93911 27.70199
## 2010-01-08 86.70687 40.59920 31.44072 52.35831 36.71787 57.91314 27.51654
## 2010-01-11 86.82798 40.43350 31.37521 52.14734 37.01932 57.59533 27.64815
## GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# 2. Calculate weekly and monthly returns using simple returns
# Calculate daily returns first
etf_daily_returns <- etf_prices_xts %>%
Return.calculate(method = "simple") %>%
na.omit()
# Calculate weekly returns
etf_weekly_returns <- etf_daily_returns %>%
apply.weekly(Return.cumulative)
# Calculate monthly returns
etf_monthly_returns <- etf_daily_returns %>%
apply.monthly(Return.cumulative)
# Display first few rows of monthly returns
head(etf_monthly_returns)
## SPY QQQ EEM IWM EFA
## 2010-01-29 -0.05241356 -0.07819892 -0.10372295 -0.06048773 -0.074916153
## 2010-02-26 0.03119477 0.04603872 0.01776404 0.04475129 0.002667793
## 2010-03-31 0.06087980 0.07710935 0.08110860 0.08230714 0.063853746
## 2010-04-30 0.01547004 0.02242547 -0.00166197 0.05678435 -0.028045402
## 2010-05-28 -0.07945442 -0.07392423 -0.09393583 -0.07536644 -0.111928307
## 2010-06-30 -0.05174134 -0.05975666 -0.01398656 -0.07743422 -0.020619328
## TLT IYR GLD
## 2010-01-29 0.02783621 -0.05195382 -0.034972713
## 2010-02-26 -0.00342509 0.05457033 0.032748219
## 2010-03-31 -0.02057354 0.09748502 -0.004386396
## 2010-04-30 0.03321954 0.06388079 0.058834363
## 2010-05-28 0.05108250 -0.05683532 0.030513147
## 2010-06-30 0.05797871 -0.04670096 0.023553189
# 3. Convert monthly returns into tibble format
etf_monthly_tibble <- etf_monthly_returns %>%
tk_tbl(rename_index = "date") %>%
mutate(date = as.Date(date))
head(etf_monthly_tibble)
## # A tibble: 6 × 9
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 2010-02-26 0.0312 0.0460 0.0178 0.0448 0.00267 -0.00343 0.0546 0.0327
## 3 2010-03-31 0.0609 0.0771 0.0811 0.0823 0.0639 -0.0206 0.0975 -0.00439
## 4 2010-04-30 0.0155 0.0224 -0.00166 0.0568 -0.0280 0.0332 0.0639 0.0588
## 5 2010-05-28 -0.0795 -0.0739 -0.0939 -0.0754 -0.112 0.0511 -0.0568 0.0305
## 6 2010-06-30 -0.0517 -0.0598 -0.0140 -0.0774 -0.0206 0.0580 -0.0467 0.0236
# 4. Download Fama French 3 factors data
# Note: This requires manual download from Ken French's website
# For demonstration, I'll show how to process the data once downloaded
# Function to process Fama-French data (assuming CSV format)
process_ff_data <- function(file_path) {
# Read the CSV file (skip header rows as needed)
ff_data <- read.csv(file_path, skip = 3, stringsAsFactors = FALSE)
# Clean column names
colnames(ff_data) <- c("date", "Mkt.RF", "SMB", "HML", "RF")
# Convert date format (assuming YYYYMM format)
ff_data$date <- as.Date(paste0(ff_data$date, "01"), format = "%Y%m%d")
# Convert percentages to decimals
ff_data[, 2:5] <- ff_data[, 2:5] / 100
return(ff_data)
}
# For demonstration, create sample FF data
# In practice, download from: http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
set.seed(123)
sample_dates <- seq.Date(from = as.Date("2010-01-01"),
to = as.Date("2024-12-01"),
by = "month")
ff_factors <- tibble(
date = sample_dates,
Mkt.RF = rnorm(length(sample_dates), 0.008, 0.04),
SMB = rnorm(length(sample_dates), 0.002, 0.02),
HML = rnorm(length(sample_dates), 0.003, 0.025),
RF = rep(0.001, length(sample_dates)) # Risk-free rate
)
# 5. Merge monthly return data and FF factors
merged_data <- etf_monthly_tibble %>%
left_join(ff_factors, by = "date") %>%
na.omit()
head(merged_data)
## # A tibble: 0 × 13
## # ℹ 13 variables: date <date>, SPY <dbl>, QQQ <dbl>, EEM <dbl>, IWM <dbl>,
## # EFA <dbl>, TLT <dbl>, IYR <dbl>, GLD <dbl>, Mkt.RF <dbl>, SMB <dbl>,
## # HML <dbl>, RF <dbl>
# 6. CAPM model covariance matrix (2010/02 - 2015/01)
# Filter data for the specified period
analysis_period <- merged_data %>%
filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-31"))
# Calculate excess returns for CAPM
excess_returns_capm <- analysis_period %>%
select(all_of(etf_tickers)) %>%
sweep(1, analysis_period$RF, "-") # Subtract risk-free rate
# --- 6. CAPM Covariance Matrix (2010/02 - 2015/01, past 60 months) ---
# Function to calculate CAPM residual covariance matrix
calculate_capm_cov <- function(asset_returns, factors) {
# Get excess returns
excess_returns <- asset_returns - factors$RF
# Run regressions of each ETF on Mkt.RF to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
calculate_capm_cov <- function(data) {
# Remove rows with NA values
data <- na.omit(data)
# Separate ETF returns and factors
asset_returns <- data %>% select(all_of(etf_tickers))
factors <- data %>% select(Mkt.RF, RF)
# Get excess returns
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
# Run regressions of each ETF on Mkt.RF to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
# --- 7. Fama-French 3-Factor Covariance Matrix (2010/02 - 2015/01) ---
# Function to calculate FF3 residual covariance matrix
calculate_ff3_cov <- function(asset_returns, factors) {
# Get excess returns
excess_returns <- asset_returns - factors$RF
# Run regressions of each ETF on Mkt.RF + SMB + HML to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
calculate_ff3_cov <- function(data) {
# Remove rows with NA values
data <- na.omit(data)
# Separate ETF returns and factors
asset_returns <- data %>% select(all_of(etf_tickers))
factors <- data %>% select(Mkt.RF, SMB, HML, RF)
# Get excess returns
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
# Run regressions of each ETF on Mkt.RF, SMB, HML to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
# --- 8. Compute GMV Portfolio Weights ---
# Function to compute GMV weights given a covariance matrix
gmv_weights <- function(cov_matrix) {
inv_cov <- solve(cov_matrix)
ones <- rep(1, ncol(cov_matrix))
weights <- inv_cov %*% ones / as.numeric(t(ones) %*% inv_cov %*% ones)
return(as.vector(weights))
}
calculate_ff3_cov <- function(data) {
data <- na.omit(data)
asset_returns <- data %>% select(all_of(etf_tickers))
factors <- data %>% select(Mkt.RF, SMB, HML, RF)
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
residuals_matrix <- sapply(excess_returns, function(y) {
lm_model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(lm_model)
})
cov(residuals_matrix)
}
calculate_ff3_cov <- function(data) {
# Remove rows with any NA values in the relevant columns
data_clean <- na.omit(data)
asset_returns <- data_clean %>% select(all_of(etf_tickers))
factors <- data_clean %>% select(Mkt.RF, SMB, HML, RF)
# Compute excess returns
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
residuals_matrix <- sapply(excess_returns, function(y) {
# For each ETF, regress on FF factors
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
cov(residuals_matrix)
}
calculate_ff3_cov <- function(data) {
data_clean <- na.omit(data)
asset_returns <- data_clean %>% select(all_of(etf_tickers))
factors <- data_clean %>% select(Mkt.RF, SMB, HML, RF)
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
cov(residuals_matrix)
}
print("GMV Weights - CAPM Model:")
## [1] "GMV Weights - CAPM Model:"
print("GMV Weights - FF3 Model:")
## [1] "GMV Weights - FF3 Model:"
# --- 9. Backtesting with Rolling Window ---
# Function to backtest GMV portfolios with rolling window estimation
backtest_gmv_portfolio <- function(returns_data, factor_data, model = "CAPM", window_size = 60) {
dates <- returns_data$date
n <- nrow(returns_data)
monthly_returns <- matrix(NA, n - window_size, 2) # Columns: CAPM, FF3
for (i in (window_size + 1):n) {
window_data <- returns_data[(i - window_size):(i - 1), ]
window_factors <- factor_data[(i - window_size):(i - 1), ]
if (model == "CAPM") {
cov_mat <- calculate_capm_cov(window_data %>% select(all_of(etf_tickers)),
window_factors %>% select(Mkt.RF, RF))
} else if (model == "FF3") {
cov_mat <- calculate_ff3_cov(window_data %>% select(all_of(etf_tickers)),
window_factors %>% select(Mkt.RF, SMB, HML, RF))
}
weights <- gmv_weights(cov_mat)
next_return <- returns_data[i, etf_tickers] %>% unlist()
monthly_returns[i - window_size, 1] <- sum(weights * next_return)
}
return(monthly_returns)
}
# Run backtests
returns_data <- merged_data
factor_data <- merged_data %>% select(date, Mkt.RF, SMB, HML, RF)
n_months <- nrow(returns_data)
# Create time series of portfolio returns
dates_backtest <- returns_data$date[61:n_months]