library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.27     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG
library(purrr)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:xts':
## 
##     first, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(quantmod)
library(xts)
library(PerformanceAnalytics)
library(tidyr)

# 1. Import ETF data from Yahoo Finance
# Define ETF tickers
etf_tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

# Download data from 2010 to current date
start_date <- "2010-01-01"
end_date <- Sys.Date()

# Download adjusted closing prices
etf_prices <- tq_get(etf_tickers, 
                     get = "stock.prices",
                     from = start_date,
                     to = end_date) %>%
  select(symbol, date, adjusted) %>%
  pivot_wider(names_from = symbol, values_from = adjusted) %>%
  arrange(date)

# Convert to xts format for easier manipulation
etf_prices_xts <- etf_prices %>%
  tk_xts(date_var = date)
## Warning: Non-numeric columns being dropped: date
# Display first few rows
head(etf_prices_xts)
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 85.76846 40.48581 31.08410 51.92011 36.38438 58.25045 27.40290
## 2010-01-05 85.99547 40.48581 31.30972 51.74159 36.41644 58.62661 27.46870
## 2010-01-06 86.05605 40.24160 31.37522 51.69288 36.57037 57.84180 27.45673
## 2010-01-07 86.41929 40.26778 31.19326 52.07430 36.42927 57.93907 27.70199
## 2010-01-08 86.70689 40.59919 31.44072 52.35832 36.71787 57.91311 27.51655
## 2010-01-11 86.82799 40.43348 31.37522 52.14733 37.01933 57.59531 27.64814
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# 2. Calculate weekly and monthly returns using simple returns
# Calculate daily returns first
etf_daily_returns <- etf_prices_xts %>%
  Return.calculate(method = "simple") %>%
  na.omit()

# Calculate weekly returns
etf_weekly_returns <- etf_daily_returns %>%
  apply.weekly(Return.cumulative)

# Calculate monthly returns
etf_monthly_returns <- etf_daily_returns %>%
  apply.monthly(Return.cumulative)

# Display first few rows of monthly returns
head(etf_monthly_returns)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-01-29 -0.05241329 -0.07819939 -0.103723018 -0.06048744 -0.074916549
## 2010-02-26  0.03119410  0.04603894  0.017763904  0.04475128  0.002667567
## 2010-03-31  0.06088018  0.07710947  0.081108957  0.08230697  0.063854332
## 2010-04-30  0.01546961  0.02242529 -0.001661721  0.05678456 -0.028045821
## 2010-05-28 -0.07945436 -0.07392372 -0.093935987 -0.07536629 -0.111928003
## 2010-06-30 -0.05174118 -0.05975684 -0.013986486 -0.07743419 -0.020619572
##                     TLT         IYR          GLD
## 2010-01-29  0.027835942 -0.05195402 -0.034972713
## 2010-02-26 -0.003424772  0.05457085  0.032748219
## 2010-03-31 -0.020573022  0.09748456 -0.004386396
## 2010-04-30  0.033217229  0.06388072  0.058834363
## 2010-05-28  0.051084090 -0.05683515  0.030513147
## 2010-06-30  0.057978643 -0.04670127  0.023553189
# 3. Convert monthly returns into tibble format
etf_monthly_tibble <- etf_monthly_returns %>%
  tk_tbl(rename_index = "date") %>%
  mutate(date = as.Date(date))

head(etf_monthly_tibble)
## # A tibble: 6 × 9
##   date           SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <date>       <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104   -0.0605 -0.0749   0.0278  -0.0520 -0.0350 
## 2 2010-02-26  0.0312  0.0460  0.0178   0.0448  0.00267 -0.00342  0.0546  0.0327 
## 3 2010-03-31  0.0609  0.0771  0.0811   0.0823  0.0639  -0.0206   0.0975 -0.00439
## 4 2010-04-30  0.0155  0.0224 -0.00166  0.0568 -0.0280   0.0332   0.0639  0.0588 
## 5 2010-05-28 -0.0795 -0.0739 -0.0939  -0.0754 -0.112    0.0511  -0.0568  0.0305 
## 6 2010-06-30 -0.0517 -0.0598 -0.0140  -0.0774 -0.0206   0.0580  -0.0467  0.0236
# 4. Download Fama French 3 factors data
# Note: This requires manual download from Ken French's website
# For demonstration, I'll show how to process the data once downloaded

# Function to process Fama-French data (assuming CSV format)
process_ff_data <- function(file_path) {
  # Read the CSV file (skip header rows as needed)
  ff_data <- read.csv(file_path, skip = 3, stringsAsFactors = FALSE)
  
  # Clean column names
  colnames(ff_data) <- c("date", "Mkt.RF", "SMB", "HML", "RF")
  
  # Convert date format (assuming YYYYMM format)
  ff_data$date <- as.Date(paste0(ff_data$date, "01"), format = "%Y%m%d")
  
  # Convert percentages to decimals
  ff_data[, 2:5] <- ff_data[, 2:5] / 100
  
  return(ff_data)
}

# For demonstration, create sample FF data
# In practice, download from: http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
set.seed(123)
sample_dates <- seq.Date(from = as.Date("2010-01-01"), 
                        to = as.Date("2024-12-01"), 
                        by = "month")

ff_factors <- tibble(
  date = sample_dates,
  Mkt.RF = rnorm(length(sample_dates), 0.008, 0.04),
  SMB = rnorm(length(sample_dates), 0.002, 0.02),
  HML = rnorm(length(sample_dates), 0.003, 0.025),
  RF = rep(0.001, length(sample_dates))  # Risk-free rate
)

# 5. Merge monthly return data and FF factors
merged_data <- etf_monthly_tibble %>%
  left_join(ff_factors, by = "date") %>%
  na.omit()

head(merged_data)
## # A tibble: 0 × 13
## # ℹ 13 variables: date <date>, SPY <dbl>, QQQ <dbl>, EEM <dbl>, IWM <dbl>,
## #   EFA <dbl>, TLT <dbl>, IYR <dbl>, GLD <dbl>, Mkt.RF <dbl>, SMB <dbl>,
## #   HML <dbl>, RF <dbl>
# 6. CAPM model covariance matrix (2010/02 - 2015/01)
# Filter data for the specified period
analysis_period <- merged_data %>%
  filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-31"))

# Calculate excess returns for CAPM
excess_returns_capm <- analysis_period %>%
  select(all_of(etf_tickers)) %>%
  sweep(1, analysis_period$RF, "-")  # Subtract risk-free rate
# --- 6. CAPM Covariance Matrix (2010/02 - 2015/01, past 60 months) ---

# Function to calculate CAPM residual covariance matrix
calculate_capm_cov <- function(asset_returns, factors) {
  # Get excess returns
  excess_returns <- asset_returns - factors$RF

  # Run regressions of each ETF on Mkt.RF to get residuals
  residuals_matrix <- sapply(excess_returns, function(y) {
    model <- lm(y ~ factors$Mkt.RF)
    resid(model)
  })

  # Compute covariance matrix of residuals
  cov_matrix <- cov(residuals_matrix)
  return(cov_matrix)
}

calculate_capm_cov <- function(data) {
  # Remove rows with NA values
  data <- na.omit(data)

  # Separate ETF returns and factors
  asset_returns <- data %>% select(all_of(etf_tickers))
  factors <- data %>% select(Mkt.RF, RF)

  # Get excess returns
  excess_returns <- sweep(asset_returns, 1, factors$RF, "-")

  # Run regressions of each ETF on Mkt.RF to get residuals
  residuals_matrix <- sapply(excess_returns, function(y) {
    model <- lm(y ~ factors$Mkt.RF)
    resid(model)
  })

  # Compute covariance matrix of residuals
  cov_matrix <- cov(residuals_matrix)
  return(cov_matrix)
}



# --- 7. Fama-French 3-Factor Covariance Matrix (2010/02 - 2015/01) ---

# Function to calculate FF3 residual covariance matrix
calculate_ff3_cov <- function(asset_returns, factors) {
  # Get excess returns
  excess_returns <- asset_returns - factors$RF

  # Run regressions of each ETF on Mkt.RF + SMB + HML to get residuals
  residuals_matrix <- sapply(excess_returns, function(y) {
    model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
    resid(model)
  })

  # Compute covariance matrix of residuals
  cov_matrix <- cov(residuals_matrix)
  return(cov_matrix)
}

calculate_ff3_cov <- function(data) {
  # Remove rows with NA values
  data <- na.omit(data)

  # Separate ETF returns and factors
  asset_returns <- data %>% select(all_of(etf_tickers))
  factors <- data %>% select(Mkt.RF, SMB, HML, RF)

  # Get excess returns
  excess_returns <- sweep(asset_returns, 1, factors$RF, "-")

  # Run regressions of each ETF on Mkt.RF, SMB, HML to get residuals
  residuals_matrix <- sapply(excess_returns, function(y) {
    model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
    resid(model)
  })

  # Compute covariance matrix of residuals
  cov_matrix <- cov(residuals_matrix)
  return(cov_matrix)
}



# --- 8. Compute GMV Portfolio Weights ---

# Function to compute GMV weights given a covariance matrix
gmv_weights <- function(cov_matrix) {
  inv_cov <- solve(cov_matrix)
  ones <- rep(1, ncol(cov_matrix))
  weights <- inv_cov %*% ones / as.numeric(t(ones) %*% inv_cov %*% ones)
  return(as.vector(weights))
}

calculate_ff3_cov <- function(data) {
  data <- na.omit(data)
  asset_returns <- data %>% select(all_of(etf_tickers))
  factors <- data %>% select(Mkt.RF, SMB, HML, RF)
  
  excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
  
  residuals_matrix <- sapply(excess_returns, function(y) {
    lm_model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
    resid(lm_model)
  })
  
  cov(residuals_matrix)
}
calculate_ff3_cov <- function(data) {
  # Remove rows with any NA values in the relevant columns
  data_clean <- na.omit(data)
  
  asset_returns <- data_clean %>% select(all_of(etf_tickers))
  factors <- data_clean %>% select(Mkt.RF, SMB, HML, RF)
  
  # Compute excess returns
  excess_returns <- sweep(asset_returns, 1, factors$RF, "-")
  
  residuals_matrix <- sapply(excess_returns, function(y) {
    # For each ETF, regress on FF factors
    model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
    resid(model)
  })
  
  cov(residuals_matrix)
}


calculate_ff3_cov <- function(data) {
  data_clean <- na.omit(data)
  asset_returns <- data_clean %>% select(all_of(etf_tickers))
  factors <- data_clean %>% select(Mkt.RF, SMB, HML, RF)

  excess_returns <- sweep(asset_returns, 1, factors$RF, "-")

  residuals_matrix <- sapply(excess_returns, function(y) {
    model <- lm(y ~ factors$Mkt.RF + factors$SMB + factors$HML)
    resid(model)
  })

  cov(residuals_matrix)
}



print("GMV Weights - CAPM Model:")
## [1] "GMV Weights - CAPM Model:"
print("GMV Weights - FF3 Model:")
## [1] "GMV Weights - FF3 Model:"
# --- 9. Backtesting with Rolling Window ---

# Function to backtest GMV portfolios with rolling window estimation
backtest_gmv_portfolio <- function(returns_data, factor_data, model = "CAPM", window_size = 60) {
  dates <- returns_data$date
  n <- nrow(returns_data)
  monthly_returns <- matrix(NA, n - window_size, 2)  # Columns: CAPM, FF3

  for (i in (window_size + 1):n) {
    window_data <- returns_data[(i - window_size):(i - 1), ]
    window_factors <- factor_data[(i - window_size):(i - 1), ]

    if (model == "CAPM") {
      cov_mat <- calculate_capm_cov(window_data %>% select(all_of(etf_tickers)),
                                    window_factors %>% select(Mkt.RF, RF))
    } else if (model == "FF3") {
      cov_mat <- calculate_ff3_cov(window_data %>% select(all_of(etf_tickers)),
                                   window_factors %>% select(Mkt.RF, SMB, HML, RF))
    }

    weights <- gmv_weights(cov_mat)
    next_return <- returns_data[i, etf_tickers] %>% unlist()
    monthly_returns[i - window_size, 1] <- sum(weights * next_return)
  }

  return(monthly_returns)
}

# Run backtests
returns_data <- merged_data
factor_data <- merged_data %>% select(date, Mkt.RF, SMB, HML, RF)

n_months <- nrow(returns_data)


# Create time series of portfolio returns
dates_backtest <- returns_data$date[61:n_months]