library(tidyquant)
library(lubridate)
library(timetk)
library(purrr)
library(dplyr)
library(readr)
library(fBasics)
library(ggplot2)
library(zoo)

1. Import data

symbols <- c("SPY","QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD") 
portfolioPrices <- NULL
for (symbol in symbols) {
  portfolioPrices <- cbind(portfolioPrices, 
                           getSymbols.yahoo(symbol,
                                            from = '2010-01-01',
                                            to = Sys.Date(),
                                            auto.assign = FALSE)[,6])
}

colnames(portfolioPrices) <- symbols

head(portfolioPrices)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 86.86005 40.73327 31.82711 52.51540 37.52379 60.71098 28.10299
## 2010-01-05 87.09000 40.73327 32.05813 52.33484 37.55686 61.10311 28.17046
## 2010-01-06 87.15131 40.48758 32.12520 52.28558 37.71560 60.28510 28.15820
## 2010-01-07 87.51918 40.51391 31.93888 52.67136 37.57008 60.38655 28.40972
## 2010-01-08 87.81045 40.84734 32.19226 52.95863 37.86774 60.35949 28.21954
## 2010-01-11 87.93307 40.68063 32.12520 52.74522 38.17862 60.02823 28.35450
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85

2. Weekly returns and monthly returns (log returns)

#Calculate weekly return
prices_weekly <- to.weekly(portfolioPrices, indexAt = "last", OHLC = FALSE)
asset_returns_wk_xts <- na.omit(Return.calculate(prices_weekly, method = "log"))
head(asset_returns_wk_xts)
##                     SPY          QQQ         EEM         IWM          EFA
## 2010-01-15 -0.008150250 -0.015151630 -0.02936191 -0.01310434 -0.003499589
## 2010-01-22 -0.039763131 -0.037556152 -0.05739696 -0.03110064 -0.057354495
## 2010-01-29 -0.016805952 -0.031514694 -0.03415418 -0.02659375 -0.026141322
## 2010-02-05 -0.006820356  0.004430495 -0.02861872 -0.01407293 -0.019238661
## 2010-02-12  0.012855207  0.017985143  0.03278999  0.02909832  0.005230675
## 2010-02-19  0.028289114  0.024157705  0.02415948  0.03288479  0.022734799
##                      TLT          IYR          GLD
## 2010-01-15  1.984851e-02 -0.006324517 -0.004589867
## 2010-01-22  1.005039e-02 -0.042682860 -0.033851808
## 2010-01-29  3.363572e-03 -0.008483327 -0.011354686
## 2010-02-05 -5.422558e-05  0.003218053 -0.012153575
## 2010-02-12 -1.965196e-02 -0.007603047  0.022294524
## 2010-02-19 -8.239999e-03  0.048966926  0.022447945
#Calculate monthly return
prices_monthly <- to.monthly(portfolioPrices, indexAt = "last", OHLC = FALSE)
asset_returns_mon_xts <- na.omit(Return.calculate(prices_monthly, method = "log"))
head(asset_returns_mon_xts)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-02-26  0.03071861  0.04501048  0.017608214  0.04377876  0.002664115
## 2010-03-31  0.05909816  0.07428100  0.077987045  0.07909470  0.061898157
## 2010-04-30  0.01535156  0.02217729 -0.001663201  0.05523095 -0.028446374
## 2010-05-28 -0.08278885 -0.07679866 -0.098645271 -0.07835757 -0.118702771
## 2010-06-30 -0.05312732 -0.06161666 -0.014085190 -0.08059676 -0.020834778
## 2010-07-30  0.06606918  0.07006952  0.103751533  0.06514079  0.109844154
##                     TLT         IYR          GLD
## 2010-02-26 -0.003429696  0.05313352  0.032223422
## 2010-03-31 -0.020788597  0.09302052 -0.004396045
## 2010-04-30  0.032678110  0.06192398  0.057168645
## 2010-05-28  0.049822577 -0.05851436  0.030056879
## 2010-06-30  0.056359423 -0.04782691  0.023280093
## 2010-07-30 -0.009508860  0.08988449 -0.052210723

3. Monthly returns to df_tbl format

monthly_returns <- tk_tbl(asset_returns_mon_xts, rename_index = "date")
head(monthly_returns)
## # A tibble: 6 × 9
##   date           SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <date>       <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-02-26  0.0307  0.0450  0.0176   0.0438  0.00266 -0.00343  0.0531  0.0322 
## 2 2010-03-31  0.0591  0.0743  0.0780   0.0791  0.0619  -0.0208   0.0930 -0.00440
## 3 2010-04-30  0.0154  0.0222 -0.00166  0.0552 -0.0284   0.0327   0.0619  0.0572 
## 4 2010-05-28 -0.0828 -0.0768 -0.0986  -0.0784 -0.119    0.0498  -0.0585  0.0301 
## 5 2010-06-30 -0.0531 -0.0616 -0.0141  -0.0806 -0.0208   0.0564  -0.0478  0.0233 
## 6 2010-07-30  0.0661  0.0701  0.104    0.0651  0.110   -0.00951  0.0899 -0.0522

4. Change Fama French 3 factors data to digit numbers

FF3 <- read_csv("C:/Users/Admin/OneDrive - 亞洲大學[Asia University]/3rd Year/2nd Sem/1. Investment Portfolio Analysis/F-F_Research_Data_Factors.CSV")
FF3 <- FF3 %>% select(-RF)
FF3 <- FF3 %>% 
  mutate_at(vars(-1), ~ ./100)
  
FF3 <- FF3 %>% 
  rename(Mkt_RF = `Mkt-RF`) %>% 
  rename(time = ...1)

head(FF3)
## # A tibble: 6 × 4
##     time  Mkt_RF     SMB     HML
##    <dbl>   <dbl>   <dbl>   <dbl>
## 1 192607  0.0296 -0.0256 -0.0243
## 2 192608  0.0264 -0.0117  0.0382
## 3 192609  0.0036 -0.014   0.0013
## 4 192610 -0.0324 -0.0009  0.007 
## 5 192611  0.0253 -0.001  -0.0051
## 6 192612  0.0262 -0.0003 -0.0005

5. Combine monthly return data of question 3 and question 4 into a df_tbl object

FF3_subset <- FF3 %>%
  filter(time >= 201002)

monthly_returns_subset <- monthly_returns %>%
  filter(date <= as.Date("2024-04-30"))

colnames(FF3_subset) <- c("date", "Mkt_RF","SMB","HML","RF")
FF3_subset$date <- format(as.Date(paste(FF3_subset$date,"01",sep=""),"%Y%m%d"),"%m/%Y")
monthly_returns_subset$date <- format(monthly_returns_subset$date,"%m/%Y")

merged_data <- left_join(monthly_returns_subset, FF3_subset, by = "date")

head(merged_data)
## # A tibble: 6 × 12
##   date        SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <chr>     <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 02/2010  0.0307  0.0450  0.0176   0.0438  0.00266 -0.00343  0.0531  0.0322 
## 2 03/2010  0.0591  0.0743  0.0780   0.0791  0.0619  -0.0208   0.0930 -0.00440
## 3 04/2010  0.0154  0.0222 -0.00166  0.0552 -0.0284   0.0327   0.0619  0.0572 
## 4 05/2010 -0.0828 -0.0768 -0.0986  -0.0784 -0.119    0.0498  -0.0585  0.0301 
## 5 06/2010 -0.0531 -0.0616 -0.0141  -0.0806 -0.0208   0.0564  -0.0478  0.0233 
## 6 07/2010  0.0661  0.0701  0.104    0.0651  0.110   -0.00951  0.0899 -0.0522 
## # ℹ 3 more variables: Mkt_RF <dbl>, SMB <dbl>, HML <dbl>

6. Compute covariance matrix for portfolio of 8 assets by using past 5 years returns (60 months) from 2/2010 to 1/2015 based on CAMP model

merged_data_60 <- merged_data[1:60, ]
portfolio8_60 <- merged_data_60 %>% 
  select(SPY:GLD)
rf <- merged_data[1:60, 10]
#Find covariance matrix using CAMP
cov_ma_sim <- function(returns, rf){
  n <- nrow(returns)
  X <- as.matrix(cbind(rep(1, n), rf))
  Y <- as.matrix(returns)
  
  b_hat <- solve(t(X) %*% X) %*% t(X) %*% Y
  E_hat = Y - X %*% b_hat
  
  b_hat <- as.matrix(b_hat[-1, ])
  diagD_hat <- diag(t(E_hat) %*% E_hat) / (n - 2)
  diag(diagD_hat)
  
  cov_sfm <- as.numeric(var(rf)) * b_hat %*% t(b_hat) + diag(diagD_hat)
  return(cov_sfm)
}

cov_ma_capm <- cov_ma_sim(portfolio8_60, rf)

head(cov_ma_capm)
##               SPY          QQQ          EEM          IWM          EFA
## SPY  0.0013791374  0.001429729  0.001721699  0.001804118  0.001574290
## QQQ  0.0014297287  0.001767918  0.001798576  0.001884674  0.001644584
## EEM  0.0017216992  0.001798576  0.003411281  0.002269551  0.001980431
## IWM  0.0018041175  0.001884674  0.002269551  0.002667849  0.002075235
## EFA  0.0015742897  0.001644584  0.001980431  0.002075235  0.002435999
## TLT -0.0009760794 -0.001019663 -0.001227892 -0.001286672 -0.001122762
##               TLT           IYR           GLD
## SPY -0.0009760794  0.0011608495  0.0002491462
## QQQ -0.0010196628  0.0012126832  0.0002602710
## EEM -0.0012278922  0.0014603300  0.0003134219
## IWM -0.0012866718  0.0015302364  0.0003284255
## EFA -0.0011227618  0.0013352985  0.0002865872
## TLT  0.0015611173 -0.0008279019 -0.0001776876

7. Compute covariance matrix for portfolio of 8 assets by using past 5 years returns (60 months) from 2/2010 to 1/2015 based on Fama-French 3-factor model

N <- nrow(portfolio8_60)
ones <- rep(1, N)
portfolio_ma <- as.matrix(portfolio8_60)
sigF3 <- as.matrix(var(cbind(merged_data_60$Mkt_RF,
                             merged_data_60$SMB,
                             merged_data_60$HML)))
X.3 <-  cbind(ones, merged_data_60$Mkt_RF, merged_data_60$SMB, merged_data_60$HML)
b_hat.3 <-  solve(t(X.3) %*% (X.3)) %*% t(X.3) %*% portfolio_ma
E_hat.3 <-  portfolio_ma - X.3 %*% b_hat.3
b_hat.3 <-  as.matrix(b_hat.3[-1, ])
diagD_hat.3 <-  diag(t(E_hat.3) %*% E_hat.3)/(N-4)
cov_ma_ff3 = t(b_hat.3) %*% sigF3 %*% b_hat.3 + diag(diagD_hat.3)
head(cov_ma_ff3)
##               SPY           QQQ          EEM          IWM          EFA
## SPY  0.0013791345  0.0014334454  0.001719507  0.001762644  0.001594371
## QQQ  0.0014334454  0.0017743517  0.001822146  0.001823958  0.001694418
## EEM  0.0017195067  0.0018221460  0.003455185  0.002266608  0.001990158
## IWM  0.0017626444  0.0018239581  0.002266608  0.002663825  0.001932300
## EFA  0.0015943709  0.0016944181  0.001990158  0.001932300  0.002453911
## TLT -0.0009748997 -0.0009535709 -0.001201660 -0.001335452 -0.001077157
##               TLT          IYR           GLD
## SPY -0.0009748997  0.001160299  2.124319e-04
## QQQ -0.0009535709  0.001220325  3.382469e-04
## EEM -0.0012016602  0.001463723  3.643305e-04
## IWM -0.0013354524  0.001528396  4.816482e-04
## EFA -0.0010771574  0.001338853  2.450634e-04
## TLT  0.0015882260 -0.000819461 -7.915754e-05

8. Calculate GMV weights based on question 6 and question 7

#GMV weights function
GMV_sim <- function(cov_ma){
  x <- ncol(cov_ma)
  one_ma <- matrix(rep(1,x), ncol = 1)
  numerator <- inv(cov_ma) %*% one_ma
  denominator <- t(one_ma) %*% inv(cov_ma) %*% one_ma
  weights <- numerator/as.vector(denominator)
  colnames(weights) <- "Weight"
  return(weights)
}
# GMV weights of CAPM model
capm_model<- GMV_sim(cov_ma_capm)
capm_model
##           Weight
## SPY  0.759781411
## QQQ -0.004424839
## EEM -0.036293486
## IWM -0.198916141
## EFA -0.036780448
## TLT  0.417610381
## IYR  0.038247253
## GLD  0.060775870
# GMV weights of Fama-French model
ff3_model <- GMV_sim(cov_ma_ff3)
ff3_model
##          Weight
## SPY  0.87449342
## QQQ -0.13248631
## EEM -0.04159750
## IWM -0.11873898
## EFA -0.10266826
## TLT  0.41771868
## IYR  0.03664871
## GLD  0.06663024

9. Backtest the porfolio performance of GMV based on CAPM and Fama-French 3-factor model

CAPM model

# Function to apply rolling window and calculate weights
rolling_capm <- function(data, window_size) {
  n <- nrow(data)
  results <- list()
  
  for (i in 1:(n - window_size + 1)) {
    window_data <- data[i:(i + window_size - 1), ]
    portfolio_returns <- window_data %>% select(SPY:GLD)
    rf <- window_data[[10]]  # Assuming the risk-free rate is the 10th column
    
    # Calculate covariance matrix
    cov_ma <- cov_ma_sim(portfolio_returns, rf)
    
    # Calculate GMV weights
    weights <- GMV_sim(cov_ma)
    
    # Store results
    results[[i]] <- weights
  }
  
  return(results)
}
# Define the window size
window_size <- 60

# Apply the rolling window function
rolling_weights <- rolling_capm(merged_data, window_size)

# Convert the list of weights to a data frame for better readability
rolling_weights_ma <- do.call(cbind, rolling_weights)

#Calculate portfolio return
portfolio8_backtest <- merged_data[60:171, 2:9]
portfolio8_backtest_ma <- as.matrix(portfolio8_backtest)
rolling_weights_ma <- t(rolling_weights_ma)

port_returns <- rowSums(portfolio8_backtest_ma*rolling_weights_ma)
port_returns <- as_tibble(port_returns)
pct_return <- port_returns %>% mutate(pct = value + 1)

cum_product <- numeric(111)
cum_product[1] <- pct_return$pct[1]
for (i in 2:112) {
  cum_product[i] <- cum_product[i - 1] * pct_return$pct[i]
}

pct_return$cum_return <- cum_product

pct_return <- pct_return %>% 
  mutate(date = seq.Date(from = as.Date("2015-01-01"), by = "month", length.out = 112)) %>% 
  select(date, everything())

ggplot(data = pct_return,
       mapping = aes(x = date, y = cum_return)) +
  geom_line(linewidth = 0.6) +
  labs(
    title = "Backtesting of CAPM model",
    subtitle = "With rolling window of 60 months, tested from 2015 to 2024",
    x = "Year", y ="Cummulative return (%)"
  )

Fana-French 3-factor model

compute_ff3_mvp_weights <- function(portfolio_window, factor_window) {
  N <- nrow(portfolio_window)
  ones <- rep(1, N)
  portfolio_ma <- as.matrix(portfolio_window)
  
  # Covariance matrix of FF5 factors
  sigF3 <- as.matrix(var(cbind(factor_window$Mkt_RF, factor_window$SMB, factor_window$HML)))
  
  # Design matrix
  X.3 <- cbind(ones, factor_window$Mkt_RF, factor_window$SMB, factor_window$HML)
  
  # Beta estimates
  b_hat.3 <- solve(t(X.3) %*% X.3) %*% t(X.3) %*% portfolio_ma
  
  # Residuals
  E_hat.3 <- portfolio_ma - X.3 %*% b_hat.3
  
  # Remove the intercept term
  b_hat.3 <- as.matrix(b_hat.3[-1, ])
  
  # Variance of residuals
  diagD_hat.3 <- diag(t(E_hat.3) %*% E_hat.3) / (N - 4)
  
  # Covariance matrix
  cov_3f.3 <- t(b_hat.3) %*% sigF3 %*% b_hat.3 + diag(diagD_hat.3)
  
  # MVP weights
  x <- ncol(cov_3f.3)
  one_ma <- matrix(rep(1, x), ncol = 1)
  numerator <- solve(cov_3f.3) %*% one_ma
  denominator <- t(one_ma) %*% solve(cov_3f.3) %*% one_ma
  weights <- numerator / as.vector(denominator)
  colnames(weights) <- "Weight"
  
  return(weights)
}

# Apply rolling window to compute weights
window_size <- 60
num_windows <- nrow(monthly_returns_subset) - window_size -1
weights_list <- list()

for (i in 1:num_windows) {
  portfolio_window <- monthly_returns_subset[i:(i + window_size - 1), 2:9]
  factor_window <- FF3_subset[i:(i + window_size - 1), 2:4]
  
  weights <- compute_ff3_mvp_weights(portfolio_window, factor_window)
  weights_list[[i]] <- weights
}

portfolio8_backtest <- merged_data[60:169, 2:9]

# Transform the list of lists into a matrix
weights_ma <- do.call(cbind, weights_list)

portfolio8_backtest_ma <- as.matrix(portfolio8_backtest)
weights_ma <- t(weights_ma) 

port_returns <- rowSums(portfolio8_backtest_ma*weights_ma)
port_returns <- as_tibble(port_returns)
pct_return <- port_returns %>% mutate(pct = value + 1)

cum_product <- numeric(110)
cum_product[1] <- pct_return$pct[1]
for (i in 2:110) {
  cum_product[i] <- cum_product[i - 1] * pct_return$pct[i]
}

pct_return$cum_return <- cum_product

pct_return <- pct_return %>% 
  mutate(date = seq.Date(from = as.Date("2015-01-01"), by = "month", length.out = 110)) %>% 
  select(date, everything())

ggplot(data = pct_return,
       mapping = aes(x = date, y = cum_return)) +
  geom_line(linewidth = 0.6) +
  labs(
    title = "Backtesting of Fama-French 3 factors model",
    subtitle = "With rolling window of 60 months, tested from 2015 to 2024",
    x = "Year", y ="Cummulative return (%)"
  )