library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.27 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
library(purrr)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:xts':
##
## first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(quantmod)
library(xts)
library(PerformanceAnalytics)
library(tidyr)
# 1. Import ETF data from Yahoo Finance
# Define ETF tickers
etf_tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
# Download data from 2010 to current date
start_date <- "2010-01-01"
end_date <- Sys.Date()
# Download adjusted closing prices
etf_prices <- tq_get(etf_tickers,
get = "stock.prices",
from = start_date,
to = end_date) %>%
select(symbol, date, adjusted) %>%
pivot_wider(names_from = symbol, values_from = adjusted) %>%
arrange(date)
# Convert to xts format for easier manipulation
etf_prices_xts <- etf_prices %>%
tk_xts(date_var = date)
## Warning: Non-numeric columns being dropped: date
# Display first few rows
head(etf_prices_xts)
## SPY QQQ EEM IWM EFA TLT IYR
## 2010-01-04 85.76846 40.48581 31.08410 51.92011 36.38438 58.25045 27.40290
## 2010-01-05 85.99547 40.48581 31.30972 51.74159 36.41644 58.62661 27.46870
## 2010-01-06 86.05605 40.24160 31.37522 51.69288 36.57037 57.84180 27.45673
## 2010-01-07 86.41929 40.26778 31.19326 52.07430 36.42927 57.93907 27.70199
## 2010-01-08 86.70689 40.59919 31.44072 52.35832 36.71787 57.91311 27.51655
## 2010-01-11 86.82799 40.43348 31.37522 52.14733 37.01933 57.59531 27.64814
## GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# 2. Calculate weekly and monthly returns using simple returns
# Calculate daily returns first
etf_daily_returns <- etf_prices_xts %>%
Return.calculate(method = "simple") %>%
na.omit()
# Calculate weekly returns
etf_weekly_returns <- etf_daily_returns %>%
apply.weekly(Return.cumulative)
# Calculate monthly returns
etf_monthly_returns <- etf_daily_returns %>%
apply.monthly(Return.cumulative)
# Display first few rows of monthly returns
head(etf_monthly_returns)
## SPY QQQ EEM IWM EFA
## 2010-01-29 -0.05241329 -0.07819939 -0.103723018 -0.06048744 -0.074916549
## 2010-02-26 0.03119410 0.04603894 0.017763904 0.04475128 0.002667567
## 2010-03-31 0.06088018 0.07710947 0.081108957 0.08230697 0.063854332
## 2010-04-30 0.01546961 0.02242529 -0.001661721 0.05678456 -0.028045821
## 2010-05-28 -0.07945436 -0.07392372 -0.093935987 -0.07536629 -0.111928003
## 2010-06-30 -0.05174118 -0.05975684 -0.013986486 -0.07743419 -0.020619572
## TLT IYR GLD
## 2010-01-29 0.027835942 -0.05195402 -0.034972713
## 2010-02-26 -0.003424772 0.05457085 0.032748219
## 2010-03-31 -0.020573022 0.09748456 -0.004386396
## 2010-04-30 0.033217229 0.06388072 0.058834363
## 2010-05-28 0.051084090 -0.05683515 0.030513147
## 2010-06-30 0.057978643 -0.04670127 0.023553189
# 3. Convert monthly returns into tibble format
etf_monthly_tibble <- etf_monthly_returns %>%
tk_tbl(rename_index = "date") %>%
mutate(date = as.Date(date))
head(etf_monthly_tibble)
## # A tibble: 6 × 9
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 2010-02-26 0.0312 0.0460 0.0178 0.0448 0.00267 -0.00342 0.0546 0.0327
## 3 2010-03-31 0.0609 0.0771 0.0811 0.0823 0.0639 -0.0206 0.0975 -0.00439
## 4 2010-04-30 0.0155 0.0224 -0.00166 0.0568 -0.0280 0.0332 0.0639 0.0588
## 5 2010-05-28 -0.0795 -0.0739 -0.0939 -0.0754 -0.112 0.0511 -0.0568 0.0305
## 6 2010-06-30 -0.0517 -0.0598 -0.0140 -0.0774 -0.0206 0.0580 -0.0467 0.0236
# 4. Download Fama French 3 factors data
# Note: This requires manual download from Ken French's website
# For demonstration, I'll show how to process the data once downloaded
# Function to process Fama-French data (assuming CSV format)
process_ff_data <- function(file_path) {
# Read the CSV file (skip header rows as needed)
ff_data <- read.csv(file_path, skip = 3, stringsAsFactors = FALSE)
# Clean column names
colnames(ff_data) <- c("date", "Mkt.RF", "SMB", "HML", "RF")
# Convert date format (assuming YYYYMM format)
ff_data$date <- as.Date(paste0(ff_data$date, "01"), format = "%Y%m%d")
# Convert percentages to decimals
ff_data[, 2:5] <- ff_data[, 2:5] / 100
return(ff_data)
}
# For demonstration, create sample FF data
# In practice, download from: http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
set.seed(123)
sample_dates <- seq.Date(from = as.Date("2010-01-01"),
to = as.Date("2024-12-01"),
by = "month")
ff_factors <- tibble(
date = sample_dates,
Mkt.RF = rnorm(length(sample_dates), 0.008, 0.04),
SMB = rnorm(length(sample_dates), 0.002, 0.02),
HML = rnorm(length(sample_dates), 0.003, 0.025),
RF = rep(0.001, length(sample_dates)) # Risk-free rate
)
# 5. Merge monthly return data and FF factors
merged_data <- etf_monthly_tibble %>%
left_join(ff_factors, by = "date") %>%
na.omit()
head(merged_data)
## # A tibble: 0 × 13
## # ℹ 13 variables: date <date>, SPY <dbl>, QQQ <dbl>, EEM <dbl>, IWM <dbl>,
## # EFA <dbl>, TLT <dbl>, IYR <dbl>, GLD <dbl>, Mkt.RF <dbl>, SMB <dbl>,
## # HML <dbl>, RF <dbl>
# 6. CAPM model covariance matrix (2010/02 - 2015/01)
# Filter data for the specified period
analysis_period <- merged_data %>%
filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-31"))
# Calculate excess returns for CAPM
excess_returns_capm <- analysis_period %>%
select(all_of(etf_tickers)) %>%
sweep(1, analysis_period$RF, "-") # Subtract risk-free rate
# --- 6. CAPM Covariance Matrix (2010/02 - 2015/01, past 60 months) ---
# Function to calculate CAPM residual covariance matrix
calculate_capm_cov <- function(asset_returns, factors) {
# Get excess returns
excess_returns <- asset_returns - factors$RF
# Run regressions of each ETF on Mkt.RF to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
calculate_capm_cov <- function(data) {
# Remove rows with NA values
data <- na.omit(data)
# Separate ETF returns and factors
asset_returns <- data %>% select(all_of(etf_tickers))
factors <- data %>% select(Mkt.RF, RF)
# Get excess returns
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
# Run regressions of each ETF on Mkt.RF to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
# --- 7. Fama-French 3-Factor Covariance Matrix (2010/02 - 2015/01) ---
# Function to calculate FF3 residual covariance matrix
calculate_ff3_cov <- function(asset_returns, factors) {
# Get excess returns
excess_returns <- asset_returns - factors$RF
# Run regressions of each ETF on Mkt.RF + SMB + HML to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
calculate_ff3_cov <- function(data) {
# Remove rows with NA values
data <- na.omit(data)
# Separate ETF returns and factors
asset_returns <- data %>% select(all_of(etf_tickers))
factors <- data %>% select(Mkt.RF, SMB, HML, RF)
# Get excess returns
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
# Run regressions of each ETF on Mkt.RF, SMB, HML to get residuals
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
# Compute covariance matrix of residuals
cov_matrix <- cov(residuals_matrix)
return(cov_matrix)
}
# --- 8. Compute GMV Portfolio Weights ---
# Function to compute GMV weights given a covariance matrix
gmv_weights <- function(cov_matrix) {
inv_cov <- solve(cov_matrix)
ones <- rep(1, ncol(cov_matrix))
weights <- inv_cov %*% ones / as.numeric(t(ones) %*% inv_cov %*% ones)
return(as.vector(weights))
}
calculate_ff3_cov <- function(data) {
data <- na.omit(data)
asset_returns <- data %>% select(all_of(etf_tickers))
factors <- data %>% select(Mkt.RF, SMB, HML, RF)
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
residuals_matrix <- sapply(excess_returns, function(y) {
lm_model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(lm_model)
})
cov(residuals_matrix)
}
calculate_ff3_cov <- function(data) {
# Remove rows with any NA values in the relevant columns
data_clean <- na.omit(data)
asset_returns <- data_clean %>% select(all_of(etf_tickers))
factors <- data_clean %>% select(Mkt.RF, SMB, HML, RF)
# Compute excess returns
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
residuals_matrix <- sapply(excess_returns, function(y) {
# For each ETF, regress on FF factors
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
cov(residuals_matrix)
}
calculate_ff3_cov <- function(data) {
data_clean <- na.omit(data)
asset_returns <- data_clean %>% select(all_of(etf_tickers))
factors <- data_clean %>% select(Mkt.RF, SMB, HML, RF)
excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
residuals_matrix <- sapply(excess_returns, function(y) {
model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
resid(model)
})
cov(residuals_matrix)
}
print("GMV Weights - CAPM Model:")
## [1] "GMV Weights - CAPM Model:"
print("GMV Weights - FF3 Model:")
## [1] "GMV Weights - FF3 Model:"
# --- 9. Backtesting with Rolling Window ---
# Function to backtest GMV portfolios with rolling window estimation
backtest_gmv_portfolio <- function(returns_data, factor_data, model = "CAPM", window_size = 60) {
dates <- returns_data$date
n <- nrow(returns_data)
monthly_returns <- matrix(NA, n - window_size, 2) # Columns: CAPM, FF3
for (i in (window_size + 1):n) {
window_data <- returns_data[(i - window_size):(i - 1), ]
window_factors <- factor_data[(i - window_size):(i - 1), ]
if (model == "CAPM") {
cov_mat <- calculate_capm_cov(window_data %>% select(all_of(etf_tickers)),
window_factors %>% select(Mkt.RF, RF))
} else if (model == "FF3") {
cov_mat <- calculate_ff3_cov(window_data %>% select(all_of(etf_tickers)),
window_factors %>% select(Mkt.RF, SMB, HML, RF))
}
weights <- gmv_weights(cov_mat)
next_return <- returns_data[i, etf_tickers] %>% unlist()
monthly_returns[i - window_size, 1] <- sum(weights * next_return)
}
return(monthly_returns)
}
# Run backtests
returns_data <- merged_data
factor_data <- merged_data %>% select(date, Mkt.RF, SMB, HML, RF)
n_months <- nrow(returns_data)
# Create time series of portfolio returns
dates_backtest <- returns_data$date[61:n_months]