knitr::opts_chunk$set(echo = TRUE)
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ──────────────────────── tidyquant 1.0.9 ──
## ✔ PerformanceAnalytics 2.0.4      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.26     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG
library(purrr)
library(quadprog)
library(readr)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:xts':
## 
##     first, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

etf_prices <- tq_get(tickers, from = "2010-01-01", to = "2024-03-31", get = "stock.prices")
monthly_returns <- etf_prices %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = "monthly",
               type = "log") %>%
  pivot_wider(names_from = symbol, values_from = monthly.returns)

weekly_returns <- etf_prices %>%
  group_by(symbol) %>%
  tq_transmute(select = adjusted,
               mutate_fun = periodReturn,
               period = "weekly",
               type = "log") %>%
  pivot_wider(names_from = symbol, values_from = weekly.returns)
# Read raw data, skipping the first 3 lines (description header)
ff_raw <- read_csv("F-F_Research_Data_Factors.CSV", skip = 3, col_names = FALSE, show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
# Inspect first few rows
head(ff_raw, 10)
## # A tibble: 10 × 5
##    X1     X2     X3    X4    X5   
##    <chr>  <chr>  <chr> <chr> <chr>
##  1 <NA>   Mkt-RF SMB   HML   RF   
##  2 192607 2.96   -2.56 -2.43 0.22 
##  3 192608 2.64   -1.17 3.82  0.25 
##  4 192609 0.36   -1.40 0.13  0.23 
##  5 192610 -3.24  -0.09 0.70  0.32 
##  6 192611 2.53   -0.10 -0.51 0.31 
##  7 192612 2.62   -0.03 -0.05 0.28 
##  8 192701 -0.06  -0.37 4.54  0.25 
##  9 192702 4.18   0.04  2.94  0.26 
## 10 192703 0.13   -1.65 -2.61 0.30
# Rename columns manually
colnames(ff_raw) <- c("Date", "MKT_RF", "SMB", "HML", "RF")

# Remove any rows that are not proper dates (e.g. contain letters or empty)
ff_clean <- ff_raw %>%
  filter(!is.na(Date), grepl("^[0-9]{6}$", Date)) %>%
  mutate(
    Date = as.Date(paste0(substr(Date, 1, 4), "-", substr(Date, 5, 6), "-01")),
    across(-Date, as.numeric) / 100  # convert from percent to decimal
  )
returns_merged <- monthly_returns %>%
  rename_with(tolower) %>%
  rename(date = date) %>%
  left_join(ff_clean, by = c("date" = "Date"))
# Step 1: Get 60 months of returns
ret_60m <- monthly_returns %>%
  filter(date >= as.Date("2019-03-01") & date <= as.Date("2024-02-29")) %>%
  select(-date)

# Step 2: Convert to numeric matrix (important fix!)
ret_60m_mat <- as.matrix(ret_60m)
ret_60m_mat <- apply(ret_60m_mat, 2, as.numeric)  # just in case any character slipped in

# Step 3: Compute covariance matrix and weights
cov_matrix <- cov(ret_60m_mat, use = "complete.obs")
inv_cov <- solve(cov_matrix)
ones <- rep(1, ncol(ret_60m_mat))

weights_capm <- inv_cov %*% ones / as.numeric(t(ones) %*% inv_cov %*% ones)
rownames(weights_capm) <- colnames(ret_60m_mat)

# Step 4: Compute CAPM MVP returns
mvp_capm_returns <- as.numeric(ret_60m_mat %*% weights_capm)

# Step 5: Get corresponding dates
dates_60m <- monthly_returns %>%
  filter(date >= as.Date("2019-03-01") & date <= as.Date("2024-02-29")) %>%
  pull(date)

capm_df <- data.frame(date = dates_60m, return = mvp_capm_returns)
# Step 1: Filter the date range and compute excess returns
excess_returns <- returns_merged %>%
  filter(date >= as.Date("2019-03-01") & date <= as.Date("2024-02-29")) %>%
  mutate(across(spy:gld, ~ . - RF)) %>%
  select(date, spy:gld)

# Step 2: Drop rows with NAs
excess_returns_clean <- excess_returns %>%
  drop_na()

# Step 3: Compute covariance matrix
cov_matrix_ff <- cov(excess_returns_clean %>% select(-date))

# Step 4: Solve for MVP weights
inv_cov_ff <- solve(cov_matrix_ff)
ones <- rep(1, ncol(excess_returns_clean) - 1)

weights_ff <- inv_cov_ff %*% ones / as.numeric(t(ones) %*% inv_cov_ff %*% ones)
rownames(weights_ff) <- colnames(excess_returns_clean %>% select(-date))

# Step 5: Convert to numeric matrix before matrix multiplication
ret_matrix_ff <- excess_returns_clean %>%
  select(-date) %>%
  mutate(across(everything(), as.numeric)) %>%
  as.matrix()

# Now compute MVP returns
mvp_ff_returns <- as.numeric(ret_matrix_ff %*% weights_ff)

# Create final dataframe
ff_df <- data.frame(date = excess_returns_clean$date, return = mvp_ff_returns)
# Get March 2024 data
march_data <- monthly_returns %>%
  filter(date == as.Date("2024-03-01")) %>%
  select(-date)

# Convert march_data to a numeric vector (assumes all columns are numeric)
march_data_numeric <- as.numeric(unlist(march_data))

# Ensure weights_capm is a numeric vector
weights_capm_numeric <- as.numeric(weights_capm)

# Realized return using CAPM weights
realized_capm <- sum(march_data_numeric * weights_capm_numeric)

# Realized return using FF weights
realized_ff <- sum(march_data_numeric * weights_ff)

# Print
realized_df <- data.frame(
  Model = c("CAPM", "Fama-French 3-Factor"),
  Return_March2024 = c(realized_capm, realized_ff)
)
realized_df
##                  Model Return_March2024
## 1                 CAPM                0
## 2 Fama-French 3-Factor                0