R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(tidyquant)
## Warning: package 'tidyquant' was built under R version 4.3.2
## Loading required package: lubridate
## Warning: package 'lubridate' was built under R version 4.3.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## Loading required package: PerformanceAnalytics
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
## Loading required package: quantmod
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lubridate)
library(timetk)
library(purrr)
library(quantmod)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:xts':
## 
##     first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)

# Define the ETF symbols
symbols <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

# Download the data from Yahoo Finance
prices <- getSymbols(symbols, src = 'yahoo', from = "2010-01-01", auto.assign = TRUE, warnings = FALSE) %>% 
  map(~ Ad(get(.))) %>% 
  reduce(merge) %>%
  `colnames<-`(symbols)

# Convert xts to tibble
prices_tibble <- tk_tbl(prices, rename_index = "date")

# Ensure date column is in character format, then convert to Date type
prices_tibble <- prices_tibble %>%
  mutate(date = as.character(date)) %>%
  mutate(date = as.Date(date))

# Calculate weekly log returns
weekly_prices <- prices_tibble %>%
  group_by(week = floor_date(date, "week")) %>%
  summarize(across(all_of(symbols), last))

weekly_returns <- weekly_prices %>%
  mutate(across(all_of(symbols), ~ log(. / lag(.)))) %>%
  drop_na()

# Calculate monthly log returns
monthly_prices <- prices_tibble %>%
  group_by(month = floor_date(date, "month")) %>%
  summarize(across(all_of(symbols), last))

monthly_returns <- monthly_prices %>%
  mutate(across(all_of(symbols), ~ log(. / lag(.)))) %>%
  drop_na()

# Convert to tibble format
monthly_returns_tibble <- as_tibble(monthly_returns)

# Rename 'month' column to 'date'
monthly_returns_tibble <- monthly_returns_tibble %>%
  rename(date = month)

# Load the Fama/French data from URL
FFdata <- read.csv("https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors.CSV", skip = 3)

# Clean and format the data, handling parsing errors
FFdata <- FFdata %>%
  mutate(date = parse_date_time(X, orders = "Y%m", quiet = TRUE)) %>%
  filter(!is.na(date)) %>%
  select(date, Mkt.RF, SMB, HML, RF) 

# Convert character columns to numeric
FFdata <- FFdata %>%
  mutate(across(-date, ~ as.numeric(gsub(" ", "", .))))

# Ensure date columns are present and in Date format
FFdata <- FFdata %>%
  mutate(date = as.Date(date))

# Merge the ETF returns with the FF factors data
merged_data <- left_join(monthly_returns_tibble, FFdata, by = 'date')

# Check the first few rows of the merged data
head(merged_data)
## # A tibble: 6 × 13
##   date           SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <date>       <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-02-01  0.0307  0.0450  0.0176   0.0438  0.00266 -0.00343  0.0531  0.0322 
## 2 2010-03-01  0.0591  0.0743  0.0780   0.0791  0.0619  -0.0208   0.0930 -0.00440
## 3 2010-04-01  0.0154  0.0222 -0.00166  0.0552 -0.0284   0.0327   0.0619  0.0572 
## 4 2010-05-01 -0.0828 -0.0768 -0.0986  -0.0784 -0.119    0.0498  -0.0585  0.0301 
## 5 2010-06-01 -0.0531 -0.0616 -0.0141  -0.0806 -0.0208   0.0564  -0.0478  0.0233 
## 6 2010-07-01  0.0661  0.0701  0.104    0.0651  0.110   -0.00951  0.0899 -0.0522 
## # ℹ 4 more variables: Mkt.RF <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
# Extract the relevant returns for the CAPM model
capm_returns <- merged_data %>%
  filter(date >= "2010-02-01" & date <= "2015-01-31") %>%
  select(-date, -Mkt.RF, -SMB, -HML, -RF)

# Compute the covariance matrix
capm_cov_matrix <- cov(capm_returns)
print(capm_cov_matrix)
##               SPY           QQQ           EEM           IWM           EFA
## SPY  0.0013789570  0.0014370242  0.0017213326  0.0017634415  0.0015961631
## QQQ  0.0014370242  0.0017632708  0.0017694587  0.0018116955  0.0016582442
## EEM  0.0017213326  0.0017694587  0.0033901758  0.0023127713  0.0024955612
## IWM  0.0017634415  0.0018116955  0.0023127713  0.0026629388  0.0019648694
## EFA  0.0015961631  0.0016582442  0.0024955612  0.0019648694  0.0024254044
## TLT -0.0009663065 -0.0009584985 -0.0011821327 -0.0013080793 -0.0011039475
## IYR  0.0011816450  0.0012041823  0.0017966331  0.0015876832  0.0015471668
## GLD  0.0002043548  0.0003904106  0.0009310468  0.0005394826  0.0004417081
##               TLT           IYR          GLD
## SPY -0.0009663065  0.0011816450 0.0002043548
## QQQ -0.0009584985  0.0012041823 0.0003904106
## EEM -0.0011821327  0.0017966331 0.0009310468
## IWM -0.0013080793  0.0015876832 0.0005394826
## EFA -0.0011039475  0.0015471668 0.0004417081
## TLT  0.0015464579 -0.0003632174 0.0001797648
## IYR -0.0003632174  0.0019945686 0.0005353319
## GLD  0.0001797648  0.0005353319 0.0029025882
# Extract the relevant returns and FF factors for the FF model
ff_returns <- merged_data %>%
  filter(date >= "2010-02-01" & date <= "2015-01-31")

# Compute the residuals for each ETF by regressing on the FF factors
residuals <- ff_returns %>%
  select(all_of(symbols)) %>%
  map_dfc(~ residuals(lm(. ~ Mkt.RF + SMB + HML, data = ff_returns)))

# Compute the covariance matrix of the residuals
ff_cov_matrix <- cov(residuals)
print(ff_cov_matrix)
##               SPY           QQQ           EEM           IWM           EFA
## SPY  3.259780e-06  3.622019e-06  1.784334e-06  8.352221e-07  1.903219e-06
## QQQ  3.622019e-06  2.067163e-04 -5.274706e-05 -1.248137e-05 -3.610031e-05
## EEM  1.784334e-06 -5.274706e-05  1.213626e-03  4.639607e-05  5.052329e-04
## IWM  8.352221e-07 -1.248137e-05  4.639607e-05  1.656360e-05  3.183176e-05
## EFA  1.903219e-06 -3.610031e-05  5.052329e-04  3.183176e-05  5.327993e-04
## TLT  8.488184e-06 -4.926804e-06  1.964548e-05  2.785470e-05 -2.700833e-05
## IYR  2.136827e-05 -1.638326e-05  3.328178e-04  5.932484e-05  2.082017e-04
## GLD -8.117783e-06  5.139918e-05  5.668777e-04  5.843737e-05  1.954868e-04
##               TLT           IYR           GLD
## SPY  8.488184e-06  2.136827e-05 -8.117783e-06
## QQQ -4.926804e-06 -1.638326e-05  5.139918e-05
## EEM  1.964548e-05  3.328178e-04  5.668777e-04
## IWM  2.785470e-05  5.932484e-05  5.843737e-05
## EFA -2.700833e-05  2.082017e-04  1.954868e-04
## TLT  7.800174e-04  4.560251e-04  2.593804e-04
## IYR  4.560251e-04  1.008808e-03  3.079622e-04
## GLD  2.593804e-04  3.079622e-04  2.510801e-03
# Function to compute GMV portfolio weights
compute_gmv_weights <- function(cov_matrix) {
  inv_cov <- solve(cov_matrix)
  ones <- rep(1, nrow(inv_cov))
  weights <- inv_cov %*% ones / sum(inv_cov %*% ones)
  return(weights)
}

# Compute the weights
gmv_weights_capm <- compute_gmv_weights(capm_cov_matrix)
gmv_weights_ff <- compute_gmv_weights(ff_cov_matrix)

print(gmv_weights_capm)
##            [,1]
## SPY  0.94117719
## QQQ -0.18088147
## EEM -0.01436988
## IWM -0.05253550
## EFA -0.01342862
## TLT  0.48106074
## IYR -0.21174222
## GLD  0.05071976
print(gmv_weights_ff)
##              [,1]
## SPY  0.8299903675
## QQQ  0.0052288200
## EEM  0.0001159185
## IWM  0.1868598178
## EFA  0.0009316490
## TLT  0.0032724032
## IYR -0.0286069123
## GLD  0.0022079364

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.