knitr::opts_chunk$set(echo = TRUE)
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.4 ✔ TTR 0.24.4
## ✔ quantmod 0.4.26 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
library(purrr)
library(quadprog)
library(readr)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:xts':
##
## first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
etf_prices <- tq_get(tickers, from = "2010-01-01", to = "2024-03-31", get = "stock.prices")
monthly_returns <- etf_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "monthly",
type = "log") %>%
pivot_wider(names_from = symbol, values_from = monthly.returns)
weekly_returns <- etf_prices %>%
group_by(symbol) %>%
tq_transmute(select = adjusted,
mutate_fun = periodReturn,
period = "weekly",
type = "log") %>%
pivot_wider(names_from = symbol, values_from = weekly.returns)
# Read raw data, skipping the first 3 lines (description header)
ff_raw <- read_csv("F-F_Research_Data_Factors.CSV", skip = 3, col_names = FALSE, show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
# Inspect first few rows
head(ff_raw, 10)
## # A tibble: 10 × 5
## X1 X2 X3 X4 X5
## <chr> <chr> <chr> <chr> <chr>
## 1 <NA> Mkt-RF SMB HML RF
## 2 192607 2.96 -2.56 -2.43 0.22
## 3 192608 2.64 -1.17 3.82 0.25
## 4 192609 0.36 -1.40 0.13 0.23
## 5 192610 -3.24 -0.09 0.70 0.32
## 6 192611 2.53 -0.10 -0.51 0.31
## 7 192612 2.62 -0.03 -0.05 0.28
## 8 192701 -0.06 -0.37 4.54 0.25
## 9 192702 4.18 0.04 2.94 0.26
## 10 192703 0.13 -1.65 -2.61 0.30
# Rename columns manually
colnames(ff_raw) <- c("Date", "MKT_RF", "SMB", "HML", "RF")
# Remove any rows that are not proper dates (e.g. contain letters or empty)
ff_clean <- ff_raw %>%
filter(!is.na(Date), grepl("^[0-9]{6}$", Date)) %>%
mutate(
Date = as.Date(paste0(substr(Date, 1, 4), "-", substr(Date, 5, 6), "-01")),
across(-Date, as.numeric) / 100 # convert from percent to decimal
)
returns_merged <- monthly_returns %>%
rename_with(tolower) %>%
rename(date = date) %>%
left_join(ff_clean, by = c("date" = "Date"))
# Step 1: Get 60 months of returns
ret_60m <- monthly_returns %>%
filter(date >= as.Date("2019-03-01") & date <= as.Date("2024-02-29")) %>%
select(-date)
# Step 2: Convert to numeric matrix (important fix!)
ret_60m_mat <- as.matrix(ret_60m)
ret_60m_mat <- apply(ret_60m_mat, 2, as.numeric) # just in case any character slipped in
# Step 3: Compute covariance matrix and weights
cov_matrix <- cov(ret_60m_mat, use = "complete.obs")
inv_cov <- solve(cov_matrix)
ones <- rep(1, ncol(ret_60m_mat))
weights_capm <- inv_cov %*% ones / as.numeric(t(ones) %*% inv_cov %*% ones)
rownames(weights_capm) <- colnames(ret_60m_mat)
# Step 4: Compute CAPM MVP returns
mvp_capm_returns <- as.numeric(ret_60m_mat %*% weights_capm)
# Step 5: Get corresponding dates
dates_60m <- monthly_returns %>%
filter(date >= as.Date("2019-03-01") & date <= as.Date("2024-02-29")) %>%
pull(date)
capm_df <- data.frame(date = dates_60m, return = mvp_capm_returns)
# Step 1: Filter the date range and compute excess returns
excess_returns <- returns_merged %>%
filter(date >= as.Date("2019-03-01") & date <= as.Date("2024-02-29")) %>%
mutate(across(spy:gld, ~ . - RF)) %>%
select(date, spy:gld)
# Step 2: Drop rows with NAs
excess_returns_clean <- excess_returns %>%
drop_na()
# Step 3: Compute covariance matrix
cov_matrix_ff <- cov(excess_returns_clean %>% select(-date))
# Step 4: Solve for MVP weights
inv_cov_ff <- solve(cov_matrix_ff)
ones <- rep(1, ncol(excess_returns_clean) - 1)
weights_ff <- inv_cov_ff %*% ones / as.numeric(t(ones) %*% inv_cov_ff %*% ones)
rownames(weights_ff) <- colnames(excess_returns_clean %>% select(-date))
# Step 5: Convert to numeric matrix before matrix multiplication
ret_matrix_ff <- excess_returns_clean %>%
select(-date) %>%
mutate(across(everything(), as.numeric)) %>%
as.matrix()
# Now compute MVP returns
mvp_ff_returns <- as.numeric(ret_matrix_ff %*% weights_ff)
# Create final dataframe
ff_df <- data.frame(date = excess_returns_clean$date, return = mvp_ff_returns)
# Get March 2024 data
march_data <- monthly_returns %>%
filter(date == as.Date("2024-03-01")) %>%
select(-date)
# Convert march_data to a numeric vector (assumes all columns are numeric)
march_data_numeric <- as.numeric(unlist(march_data))
# Ensure weights_capm is a numeric vector
weights_capm_numeric <- as.numeric(weights_capm)
# Realized return using CAPM weights
realized_capm <- sum(march_data_numeric * weights_capm_numeric)
# Realized return using FF weights
realized_ff <- sum(march_data_numeric * weights_ff)
# Print
realized_df <- data.frame(
Model = c("CAPM", "Fama-French 3-Factor"),
Return_March2024 = c(realized_capm, realized_ff)
)
realized_df
## Model Return_March2024
## 1 CAPM 0
## 2 Fama-French 3-Factor 0