library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.27     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG
library(purrr)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:xts':
## 
##     first, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(fBasics)
## 
## Attaching package: 'fBasics'
## 
## The following objects are masked from 'package:PerformanceAnalytics':
## 
##     kurtosis, skewness
## 
## The following object is masked from 'package:TTR':
## 
##     volatility
library(ggplot2)
library(zoo)
symbols <- c("SPY","QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD") 
portfolioPrices <- NULL
for (symbol in symbols) {
  portfolioPrices <- cbind(portfolioPrices, 
                           getSymbols.yahoo(symbol,
                                            from = '2010-01-01',
                                            to = Sys.Date(),
                                            auto.assign = FALSE)[,6])
}

colnames(portfolioPrices) <- symbols

head(portfolioPrices)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 85.76845 40.48581 31.08409 51.92012 36.38437 58.25041 27.40290
## 2010-01-05 85.99548 40.48581 31.30971 51.74157 36.41644 58.62657 27.46869
## 2010-01-06 86.05599 40.24162 31.37522 51.69290 36.57037 57.84177 27.45673
## 2010-01-07 86.41930 40.26778 31.19327 52.07430 36.42927 57.93904 27.70198
## 2010-01-08 86.70689 40.59919 31.44071 52.35833 36.71788 57.91314 27.51654
## 2010-01-11 86.82797 40.43348 31.37522 52.14734 37.01932 57.59533 27.64814
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
###2
#Calculate weekly return
prices_weekly <- to.weekly(portfolioPrices, indexAt = "last", OHLC = FALSE)
asset_returns_wk_xts <- na.omit(Return.calculate(prices_weekly, method = "log"))
head(asset_returns_wk_xts)
##                     SPY          QQQ         EEM         IWM          EFA
## 2010-01-15 -0.008150437 -0.015151840 -0.02936192 -0.01310491 -0.003499592
## 2010-01-22 -0.039762950 -0.037555834 -0.05739637 -0.03110021 -0.057354398
## 2010-01-29 -0.016805842 -0.031514535 -0.03415450 -0.02659400 -0.026141596
## 2010-02-05 -0.006820598  0.004429936 -0.02861882 -0.01407284 -0.019238906
## 2010-02-12  0.012855419  0.017985548  0.03279006  0.02909850  0.005231077
## 2010-02-19  0.028288820  0.024157339  0.02415932  0.03288469  0.022734518
##                      TLT          IYR          GLD
## 2010-01-15  1.984876e-02 -0.006324216 -0.004589867
## 2010-01-22  1.005069e-02 -0.042682960 -0.033851808
## 2010-01-29  3.364159e-03 -0.008483588 -0.011354686
## 2010-02-05 -5.498693e-05  0.003218025 -0.012153575
## 2010-02-12 -1.965273e-02 -0.007602470  0.022294524
## 2010-02-19 -8.237800e-03  0.048966210  0.022447945
#Calculate monthly return
prices_monthly <- to.monthly(portfolioPrices, indexAt = "last", OHLC = FALSE)
asset_returns_mon_xts <- na.omit(Return.calculate(prices_monthly, method = "log"))
head(asset_returns_mon_xts)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-02-26  0.03071829  0.04501037  0.017608033  0.04377892  0.002663902
## 2010-03-31  0.05909865  0.07428073  0.077986808  0.07909471  0.061898370
## 2010-04-30  0.01535160  0.02217763 -0.001663229  0.05523140 -0.028446401
## 2010-05-28 -0.08278948 -0.07679846 -0.098644878 -0.07835829 -0.118702509
## 2010-06-30 -0.05312742 -0.06161683 -0.014085706 -0.08059614 -0.020834745
## 2010-07-30  0.06606964  0.07006851  0.103752072  0.06514063  0.109843812
##                     TLT         IYR          GLD
## 2010-02-26 -0.003430392  0.05313327  0.032223422
## 2010-03-31 -0.020788818  0.09302129 -0.004396045
## 2010-04-30  0.032679049  0.06192340  0.057168645
## 2010-05-28  0.049822040 -0.05851393  0.030056879
## 2010-06-30  0.056358778 -0.04782714  0.023280093
## 2010-07-30 -0.009508660  0.08988476 -0.052210723
###3
monthly_returns <- tk_tbl(asset_returns_mon_xts, rename_index = "date")
head(monthly_returns)
## # A tibble: 6 × 9
##   date           SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <date>       <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-02-26  0.0307  0.0450  0.0176   0.0438  0.00266 -0.00343  0.0531  0.0322 
## 2 2010-03-31  0.0591  0.0743  0.0780   0.0791  0.0619  -0.0208   0.0930 -0.00440
## 3 2010-04-30  0.0154  0.0222 -0.00166  0.0552 -0.0284   0.0327   0.0619  0.0572 
## 4 2010-05-28 -0.0828 -0.0768 -0.0986  -0.0784 -0.119    0.0498  -0.0585  0.0301 
## 5 2010-06-30 -0.0531 -0.0616 -0.0141  -0.0806 -0.0208   0.0564  -0.0478  0.0233 
## 6 2010-07-30  0.0661  0.0701  0.104    0.0651  0.110   -0.00951  0.0899 -0.0522
###4
FF3 <- read_csv("/cloud/project/F-F_Research_Data_Factors 3.csv")
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 1289 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (4): Mkt-RF, SMB, HML, RF
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
FF3 <- FF3 %>% select(-RF)
FF3 <- FF3 %>% 
  mutate_at(vars(-1), ~ ./100)
  
FF3 <- FF3 %>% 
  rename(Mkt_RF = `Mkt-RF`) %>% 
  rename(time = ...1)

head(FF3)
## # A tibble: 6 × 4
##   time    Mkt_RF     SMB     HML
##   <chr>    <dbl>   <dbl>   <dbl>
## 1 192607  0.0289 -0.0255 -0.0239
## 2 192608  0.0264 -0.0114  0.0381
## 3 192609  0.0038 -0.0136  0.0005
## 4 192610 -0.0327 -0.0014  0.0082
## 5 192611  0.0254 -0.0011 -0.0061
## 6 192612  0.0262 -0.0007  0.0006
###5
FF3_subset <- FF3 %>%
  filter(time >= 201002)

monthly_returns_subset <- monthly_returns %>%
  filter(date <= as.Date("2024-04-30"))

colnames(FF3_subset) <- c("date", "Mkt_RF","SMB","HML","RF")
FF3_subset$date <- format(as.Date(paste(FF3_subset$date,"01",sep=""),"%Y%m%d"),"%m/%Y")
monthly_returns_subset$date <- format(monthly_returns_subset$date,"%m/%Y")

merged_data <- left_join(monthly_returns_subset, FF3_subset, by = "date")

head(merged_data)
## # A tibble: 6 × 12
##   date        SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <chr>     <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 02/2010  0.0307  0.0450  0.0176   0.0438  0.00266 -0.00343  0.0531  0.0322 
## 2 03/2010  0.0591  0.0743  0.0780   0.0791  0.0619  -0.0208   0.0930 -0.00440
## 3 04/2010  0.0154  0.0222 -0.00166  0.0552 -0.0284   0.0327   0.0619  0.0572 
## 4 05/2010 -0.0828 -0.0768 -0.0986  -0.0784 -0.119    0.0498  -0.0585  0.0301 
## 5 06/2010 -0.0531 -0.0616 -0.0141  -0.0806 -0.0208   0.0564  -0.0478  0.0233 
## 6 07/2010  0.0661  0.0701  0.104    0.0651  0.110   -0.00951  0.0899 -0.0522 
## # ℹ 3 more variables: Mkt_RF <dbl>, SMB <dbl>, HML <dbl>
###6
merged_data_60 <- merged_data[1:60, ]
portfolio8_60 <- merged_data_60 %>% 
  select(SPY:GLD)
rf <- merged_data[1:60, 10]
#Find covariance matrix using CAMP
cov_ma_sim <- function(returns, rf){
  n <- nrow(returns)
  X <- as.matrix(cbind(rep(1, n), rf))
  Y <- as.matrix(returns)
  
  b_hat <- solve(t(X) %*% X) %*% t(X) %*% Y
  E_hat = Y - X %*% b_hat
  
  b_hat <- as.matrix(b_hat[-1, ])
  diagD_hat <- diag(t(E_hat) %*% E_hat) / (n - 2)
  diag(diagD_hat)
  
  cov_sfm <- as.numeric(var(rf)) * b_hat %*% t(b_hat) + diag(diagD_hat)
  return(cov_sfm)
}

cov_ma_capm <- cov_ma_sim(portfolio8_60, rf)

head(cov_ma_capm)
##               SPY          QQQ          EEM          IWM          EFA
## SPY  0.0013791391  0.001429824  0.001721310  0.001803727  0.001574553
## QQQ  0.0014298244  0.001767914  0.001798113  0.001884208  0.001644808
## EEM  0.0017213096  0.001798113  0.003411299  0.002268325  0.001980121
## IWM  0.0018037272  0.001884208  0.002268325  0.002667868  0.002074930
## EFA  0.0015745528  0.001644808  0.001980121  0.002074930  0.002435990
## TLT -0.0009758235 -0.001019364 -0.001227173 -0.001285931 -0.001122545
##               TLT           IYR           GLD
## SPY -0.0009758235  0.0011607664  0.0002490404
## QQQ -0.0010193642  0.0012125592  0.0002601524
## EEM -0.0012271727  0.0014597525  0.0003131873
## IWM -0.0012859306  0.0015296466  0.0003281830
## EFA -0.0011225454  0.0013352958  0.0002864854
## TLT  0.0015611222 -0.0008275448 -0.0001775483
###7
N <- nrow(portfolio8_60)
ones <- rep(1, N)
portfolio_ma <- as.matrix(portfolio8_60)
sigF3 <- as.matrix(var(cbind(merged_data_60$Mkt_RF,
                             merged_data_60$SMB,
                             merged_data_60$HML)))
X.3 <-  cbind(ones, merged_data_60$Mkt_RF, merged_data_60$SMB, merged_data_60$HML)
b_hat.3 <-  solve(t(X.3) %*% (X.3)) %*% t(X.3) %*% portfolio_ma
E_hat.3 <-  portfolio_ma - X.3 %*% b_hat.3
b_hat.3 <-  as.matrix(b_hat.3[-1, ])
diagD_hat.3 <-  diag(t(E_hat.3) %*% E_hat.3)/(N-4)
cov_ma_ff3 = t(b_hat.3) %*% sigF3 %*% b_hat.3 + diag(diagD_hat.3)
head(cov_ma_ff3)
##               SPY           QQQ          EEM          IWM          EFA
## SPY  0.0013791371  0.0014329219  0.001719827  0.001762599  0.001594363
## QQQ  0.0014329219  0.0017744556  0.001822619  0.001826812  0.001692918
## EEM  0.0017198275  0.0018226190  0.003455233  0.002260509  0.001992558
## IWM  0.0017625987  0.0018268124  0.002260509  0.002663865  0.001932037
## EFA  0.0015943632  0.0016929184  0.001992558  0.001932037  0.002453887
## TLT -0.0009745603 -0.0009558418 -0.001200759 -0.001334850 -0.001077284
##               TLT           IYR           GLD
## SPY -0.0009745603  0.0011605608  2.128627e-04
## QQQ -0.0009558418  0.0012192457  3.408291e-04
## EEM -0.0012007587  0.0014626689  3.619003e-04
## IWM -0.0013348502  0.0015263983  4.781589e-04
## EFA -0.0010772838  0.0013391964  2.475973e-04
## TLT  0.0015884417 -0.0008203952 -8.150332e-05
###8
#GMV weights function
GMV_sim <- function(cov_ma){
  x <- ncol(cov_ma)
  one_ma <- matrix(rep(1,x), ncol = 1)
  numerator <- inv(cov_ma) %*% one_ma
  denominator <- t(one_ma) %*% inv(cov_ma) %*% one_ma
  weights <- numerator/as.vector(denominator)
  colnames(weights) <- "Weight"
  return(weights)
}
# GMV weights of CAPM model
capm_model<- GMV_sim(cov_ma_capm)
capm_model
##          Weight
## SPY  0.75981612
## QQQ -0.00480398
## EEM -0.03631930
## IWM -0.19834636
## EFA -0.03704882
## TLT  0.41768483
## IYR  0.03820445
## GLD  0.06081305
# GMV weights of Fama-French model
ff3_model <- GMV_sim(cov_ma_ff3)
ff3_model
##          Weight
## SPY  0.86815722
## QQQ -0.12635460
## EEM -0.04234021
## IWM -0.11779684
## EFA -0.10293814
## TLT  0.41770290
## IYR  0.03680813
## GLD  0.06676155
###9
# Function to apply rolling window and calculate weights
rolling_capm <- function(data, window_size) {
  n <- nrow(data)
  results <- list()
  
  for (i in 1:(n - window_size + 1)) {
    window_data <- data[i:(i + window_size - 1), ]
    portfolio_returns <- window_data %>% select(SPY:GLD)
    rf <- window_data[[10]]  # Assuming the risk-free rate is the 10th column
    
    # Calculate covariance matrix
    cov_ma <- cov_ma_sim(portfolio_returns, rf)
    
    # Calculate GMV weights
    weights <- GMV_sim(cov_ma)
    
    # Store results
    results[[i]] <- weights
  }
  
  return(results)
}
# Define the window size
window_size <- 60

# Apply the rolling window function
rolling_weights <- rolling_capm(merged_data, window_size)

# Convert the list of weights to a data frame for better readability
rolling_weights_ma <- do.call(cbind, rolling_weights)

#Calculate portfolio return
portfolio8_backtest <- merged_data[60:171, 2:9]
portfolio8_backtest_ma <- as.matrix(portfolio8_backtest)
rolling_weights_ma <- t(rolling_weights_ma)

port_returns <- rowSums(portfolio8_backtest_ma*rolling_weights_ma)
port_returns <- as_tibble(port_returns)
pct_return <- port_returns %>% mutate(pct = value + 1)

cum_product <- numeric(111)
cum_product[1] <- pct_return$pct[1]
for (i in 2:112) {
  cum_product[i] <- cum_product[i - 1] * pct_return$pct[i]
}

pct_return$cum_return <- cum_product

pct_return <- pct_return %>% 
  mutate(date = seq.Date(from = as.Date("2015-01-01"), by = "month", length.out = 112)) %>% 
  select(date, everything())

ggplot(data = pct_return,
       mapping = aes(x = date, y = cum_return)) +
  geom_line(linewidth = 0.6) +
  labs(
    title = "Backtesting of CAPM model",
    subtitle = "With rolling window of 60 months, tested from 2015 to 2024",
    x = "Year", y ="Cummulative return (%)"
  )

compute_ff3_mvp_weights <- function(portfolio_window, factor_window) {
  N <- nrow(portfolio_window)
  ones <- rep(1, N)
  portfolio_ma <- as.matrix(portfolio_window)
  
  # Covariance matrix of FF5 factors
  sigF3 <- as.matrix(var(cbind(factor_window$Mkt_RF, factor_window$SMB, factor_window$HML)))
  
  # Design matrix
  X.3 <- cbind(ones, factor_window$Mkt_RF, factor_window$SMB, factor_window$HML)
  
  # Beta estimates
  b_hat.3 <- solve(t(X.3) %*% X.3) %*% t(X.3) %*% portfolio_ma
  
  # Residuals
  E_hat.3 <- portfolio_ma - X.3 %*% b_hat.3
  
  # Remove the intercept term
  b_hat.3 <- as.matrix(b_hat.3[-1, ])
  
  # Variance of residuals
  diagD_hat.3 <- diag(t(E_hat.3) %*% E_hat.3) / (N - 4)
  
  # Covariance matrix
  cov_3f.3 <- t(b_hat.3) %*% sigF3 %*% b_hat.3 + diag(diagD_hat.3)
  
  # MVP weights
  x <- ncol(cov_3f.3)
  one_ma <- matrix(rep(1, x), ncol = 1)
  numerator <- solve(cov_3f.3) %*% one_ma
  denominator <- t(one_ma) %*% solve(cov_3f.3) %*% one_ma
  weights <- numerator / as.vector(denominator)
  colnames(weights) <- "Weight"
  
  return(weights)
}

# Apply rolling window to compute weights
window_size <- 60
num_windows <- nrow(monthly_returns_subset) - window_size -1
weights_list <- list()

for (i in 1:num_windows) {
  portfolio_window <- monthly_returns_subset[i:(i + window_size - 1), 2:9]
  factor_window <- FF3_subset[i:(i + window_size - 1), 2:4]
  
  weights <- compute_ff3_mvp_weights(portfolio_window, factor_window)
  weights_list[[i]] <- weights
}

portfolio8_backtest <- merged_data[60:169, 2:9]

# Transform the list of lists into a matrix
weights_ma <- do.call(cbind, weights_list)

portfolio8_backtest_ma <- as.matrix(portfolio8_backtest)
weights_ma <- t(weights_ma) 

port_returns <- rowSums(portfolio8_backtest_ma*weights_ma)
port_returns <- as_tibble(port_returns)
pct_return <- port_returns %>% mutate(pct = value + 1)

cum_product <- numeric(110)
cum_product[1] <- pct_return$pct[1]
for (i in 2:110) {
  cum_product[i] <- cum_product[i - 1] * pct_return$pct[i]
}

pct_return$cum_return <- cum_product

pct_return <- pct_return %>% 
  mutate(date = seq.Date(from = as.Date("2015-01-01"), by = "month", length.out = 110)) %>% 
  select(date, everything())

ggplot(data = pct_return,
       mapping = aes(x = date, y = cum_return)) +
  geom_line(linewidth = 0.6) +
  labs(
    title = "Backtesting of Fama-French 3 factors model",
    subtitle = "With rolling window of 60 months, tested from 2015 to 2024",
    x = "Year", y ="Cummulative return (%)"
  )