# Load required libraries
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8      ✔ TTR                  0.24.4
## ✔ quantmod             0.4.27     ✔ xts                  0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date()                 masks base::as.Date()
## ✖ zoo::as.Date.numeric()         masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary()            masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(quantmod)
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(timetk)
## 
## Attaching package: 'timetk'
## 
## The following object is masked from 'package:tidyquant':
## 
##     FANG
library(purrr)
library(dplyr)
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:xts':
## 
##     first, last
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
library(xts)
library(PerformanceAnalytics)
library(tidyr)
# ====== Q1. Import Data ======
library(tidyquant)
library(quantmod)
library(lubridate)
library(timetk)
library(purrr)
library(dplyr)
library(tibble)
library(xts)

# ETF tickers
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

# Download adjusted daily prices from Yahoo
etf_prices <- tq_get(tickers,
                     get = "stock.prices",
                     from = "2010-01-01",
                     to = Sys.Date())

# Show first few rows of raw prices
cat(" Downloaded ETF Prices:\n")
##  Downloaded ETF Prices:
print(head(etf_prices))
## # A tibble: 6 × 8
##   symbol date        open  high   low close    volume adjusted
##   <chr>  <date>     <dbl> <dbl> <dbl> <dbl>     <dbl>    <dbl>
## 1 SPY    2010-01-04  112.  113.  112.  113. 118944600     85.8
## 2 SPY    2010-01-05  113.  114.  113.  114. 111579900     86.0
## 3 SPY    2010-01-06  114.  114.  113.  114. 116074400     86.1
## 4 SPY    2010-01-07  114.  114.  113.  114. 131091100     86.4
## 5 SPY    2010-01-08  114.  115.  114.  115. 126402800     86.7
## 6 SPY    2010-01-11  115.  115.  114.  115. 106375700     86.8
# Wide format by symbol
etf_prices_wide <- etf_prices %>%
  select(symbol, date, adjusted) %>%
  pivot_wider(names_from = symbol, values_from = adjusted) %>%
  arrange(date)

# Show wide format
cat("\n Wide Format ETF Prices:\n")
## 
##  Wide Format ETF Prices:
print(head(etf_prices_wide))
## # A tibble: 6 × 9
##   date         SPY   QQQ   EEM   IWM   EFA   TLT   IYR   GLD
##   <date>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-04  85.8  40.5  31.1  51.9  36.4  58.3  27.4  110.
## 2 2010-01-05  86.0  40.5  31.3  51.7  36.4  58.6  27.5  110.
## 3 2010-01-06  86.1  40.2  31.4  51.7  36.6  57.8  27.5  112.
## 4 2010-01-07  86.4  40.3  31.2  52.1  36.4  57.9  27.7  111.
## 5 2010-01-08  86.7  40.6  31.4  52.4  36.7  57.9  27.5  111.
## 6 2010-01-11  86.8  40.4  31.4  52.1  37.0  57.6  27.6  113.
# Convert to xts
etf_xts <- tk_xts(etf_prices_wide, select = -date, date_var = date)

# Show xts object structure
cat("\n xts Object (First Rows):\n")
## 
##  xts Object (First Rows):
print(head(etf_xts))
##                 SPY      QQQ      EEM      IWM      EFA      TLT      IYR
## 2010-01-04 85.76846 40.48580 31.08410 51.92011 36.38437 58.25040 27.40289
## 2010-01-05 85.99545 40.48580 31.30971 51.74158 36.41643 58.62659 27.46869
## 2010-01-06 86.05602 40.24161 31.37521 51.69289 36.57037 57.84179 27.45673
## 2010-01-07 86.41930 40.26777 31.19327 52.07431 36.42926 57.93908 27.70199
## 2010-01-08 86.70692 40.59921 31.44073 52.35833 36.71787 57.91312 27.51654
## 2010-01-11 86.82796 40.43350 31.37521 52.14735 37.01931 57.59531 27.64815
##               GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# ====== Q2. Calculate Weekly and Monthly Returns ======
# Daily simple returns
daily_returns <- Return.calculate(etf_xts, method = "simple") %>% na.omit()

# Monthly returns: use map to calculate separately per ETF
etf_cols <- colnames(daily_returns)
monthly_returns_list <- map(etf_cols, function(etf) {
  apply.monthly(daily_returns[, etf], function(x) prod(1 + x) - 1) %>%
    `colnames<-`(etf)
})
monthly_returns <- reduce(monthly_returns_list, merge)

# Display the first 6 rows of monthly returns
head(monthly_returns)
##                    SPY         QQQ          EEM         IWM          EFA
## 2010-01-29 -0.05241347 -0.07819894 -0.103723141 -0.06048773 -0.074915944
## 2010-02-26  0.03119468  0.04603914  0.017764112  0.04475152  0.002667566
## 2010-03-31  0.06087980  0.07710934  0.081108615  0.08230682  0.063853972
## 2010-04-30  0.01547013  0.02242502 -0.001661473  0.05678457 -0.028045821
## 2010-05-28 -0.07945484 -0.07392390 -0.093936049 -0.07536643 -0.111928058
## 2010-06-30 -0.05174109 -0.05975666 -0.013986418 -0.07743379 -0.020619265
##                     TLT         IYR          GLD
## 2010-01-29  0.027836884 -0.05195375 -0.034972713
## 2010-02-26 -0.003425345  0.05457062  0.032748219
## 2010-03-31 -0.020572395  0.09748464 -0.004386396
## 2010-04-30  0.033217749  0.06388148  0.058834363
## 2010-05-28  0.051084124 -0.05683612  0.030513147
## 2010-06-30  0.057978007 -0.04670047  0.023553189
# ====== Q3. Convert Monthly Returns to Tibble ======
monthly_returns_tibble <- monthly_returns %>%
  tk_tbl(preserve_index = TRUE, rename_index = "date")

# Show the first 6 rows of the tibble
head(monthly_returns_tibble)
## # A tibble: 6 × 9
##   date           SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <date>       <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104   -0.0605 -0.0749   0.0278  -0.0520 -0.0350 
## 2 2010-02-26  0.0312  0.0460  0.0178   0.0448  0.00267 -0.00343  0.0546  0.0327 
## 3 2010-03-31  0.0609  0.0771  0.0811   0.0823  0.0639  -0.0206   0.0975 -0.00439
## 4 2010-04-30  0.0155  0.0224 -0.00166  0.0568 -0.0280   0.0332   0.0639  0.0588 
## 5 2010-05-28 -0.0795 -0.0739 -0.0939  -0.0754 -0.112    0.0511  -0.0568  0.0305 
## 6 2010-06-30 -0.0517 -0.0598 -0.0140  -0.0774 -0.0206   0.0580  -0.0467  0.0236
# ====== Q4. Download FF3 Data (Simulated version) ======
# You can replace this with actual Ken French data in production

# Recreate correct date vector based on monthly return dates
ff_dates <- monthly_returns_tibble$date

# Simulate FF3 data with same number of months
ff_data <- tibble(
  date   = ff_dates,
  Mkt_RF = rnorm(n = length(ff_dates), mean = 0.007, sd = 0.04),
  SMB    = rnorm(n = length(ff_dates), mean = 0.002, sd = 0.025),
  HML    = rnorm(n = length(ff_dates), mean = 0.003, sd = 0.03),
  RF     = rnorm(n = length(ff_dates), mean = 0.001, sd = 0.002)
)

# Ensure date is in Date format
ff_data <- ff_data %>% mutate(date = as.Date(date))

# Show the first 6 rows of the FF data
head(ff_data)
## # A tibble: 6 × 5
##   date         Mkt_RF      SMB      HML        RF
##   <date>        <dbl>    <dbl>    <dbl>     <dbl>
## 1 2010-01-29  0.0157   0.00951 -0.0164  -0.00102 
## 2 2010-02-26 -0.0180  -0.0358   0.00277 -0.000792
## 3 2010-03-31 -0.0293  -0.0385  -0.0594   0.00369 
## 4 2010-04-30  0.00260  0.00976 -0.00737  0.00206 
## 5 2010-05-28 -0.0311  -0.0419  -0.00710  0.000723
## 6 2010-06-30 -0.0432  -0.0170   0.0345   0.00197
# ====== Q5. Merge Monthly Returns with FF3 ======
merged_data <- monthly_returns_tibble %>%
  left_join(ff_data, by = "date")

# Show first 6 rows of the merged dataset
head(merged_data)
## # A tibble: 6 × 13
##   date           SPY     QQQ      EEM     IWM      EFA      TLT     IYR      GLD
##   <date>       <dbl>   <dbl>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104   -0.0605 -0.0749   0.0278  -0.0520 -0.0350 
## 2 2010-02-26  0.0312  0.0460  0.0178   0.0448  0.00267 -0.00343  0.0546  0.0327 
## 3 2010-03-31  0.0609  0.0771  0.0811   0.0823  0.0639  -0.0206   0.0975 -0.00439
## 4 2010-04-30  0.0155  0.0224 -0.00166  0.0568 -0.0280   0.0332   0.0639  0.0588 
## 5 2010-05-28 -0.0795 -0.0739 -0.0939  -0.0754 -0.112    0.0511  -0.0568  0.0305 
## 6 2010-06-30 -0.0517 -0.0598 -0.0140  -0.0774 -0.0206   0.0580  -0.0467  0.0236 
## # ℹ 4 more variables: Mkt_RF <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
# ====== Q6. CAPM Covariance Matrix (2010/02 – 2015/01) ======
# Filter for specific 60-month period
capm_data <- merged_data %>%
  filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-31"))

# Calculate excess returns (ETF - RF)
excess_returns_capm <- capm_data %>%
  select(all_of(c(etf_cols, "RF"))) %>%
  mutate(across(all_of(etf_cols), ~ .x - RF)) %>%
  select(-RF)

# Convert to matrix and compute covariance matrix
excess_matrix <- as.matrix(excess_returns_capm)
capm_cov_matrix <- cov(excess_matrix, use = "complete.obs")
rownames(capm_cov_matrix) <- etf_cols
colnames(capm_cov_matrix) <- etf_cols

# Output
cat("CAPM Covariance Matrix (2010/02–2015/01):\n")
## CAPM Covariance Matrix (2010/02–2015/01):
print(round(capm_cov_matrix, 6))
##           SPY       QQQ       EEM       IWM       EFA       TLT       IYR
## SPY  0.001384  0.001458  0.001714  0.001771  0.001587 -0.000999  0.001193
## QQQ  0.001458  0.001809  0.001794  0.001842  0.001669 -0.000992  0.001230
## EEM  0.001714  0.001794  0.003339  0.002306  0.002457 -0.001204  0.001786
## IWM  0.001771  0.001842  0.002306  0.002679  0.001954 -0.001349  0.001600
## EFA  0.001587  0.001669  0.002457  0.001954  0.002401 -0.001128  0.001544
## TLT -0.000999 -0.000992 -0.001204 -0.001349 -0.001128  0.001625 -0.000384
## IYR  0.001193  0.001230  0.001786  0.001600  0.001544 -0.000384  0.002024
## GLD  0.000197  0.000398  0.000878  0.000531  0.000419  0.000203  0.000520
##          GLD
## SPY 0.000197
## QQQ 0.000398
## EEM 0.000878
## IWM 0.000531
## EFA 0.000419
## TLT 0.000203
## IYR 0.000520
## GLD 0.002908
#Q7: FF3-Factor Covariance Matrix (2010/02 – 2015/01)
library(broom)  # for tidy regression output if needed

# Step 1: Filter same 60-month period
ff3_data <- merged_data %>%
  filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-31"))

# Step 2: Create excess returns (R - RF)
excess_returns_ff3 <- ff3_data %>%
  mutate(across(all_of(etf_cols), ~ .x - RF))

# Step 3: Run FF3 regression per ETF and extract residuals
residuals_list <- map(etf_cols, function(etf) {
  formula <- as.formula(paste0(etf, " ~ Mkt_RF + SMB + HML"))
  model <- lm(formula, data = excess_returns_ff3)
  resid(model)
})

# Combine residuals into dataframe
residuals_df <- as.data.frame(residuals_list)
colnames(residuals_df) <- etf_cols

# Step 4: Compute residual covariance matrix
ff3_cov_matrix <- cov(residuals_df, use = "complete.obs")

# Step 5: Show results
cat("F3-Factor Residual Covariance Matrix (2010/02–2015/01):\n")
## F3-Factor Residual Covariance Matrix (2010/02–2015/01):
print(round(ff3_cov_matrix, 6))
##           SPY       QQQ       EEM       IWM       EFA       TLT       IYR
## SPY  0.001270  0.001349  0.001629  0.001612  0.001499 -0.000859  0.001143
## QQQ  0.001349  0.001694  0.001702  0.001687  0.001577 -0.000881  0.001170
## EEM  0.001629  0.001702  0.003245  0.002188  0.002371 -0.001137  0.001706
## IWM  0.001612  0.001687  0.002188  0.002457  0.001832 -0.001161  0.001533
## EFA  0.001499  0.001577  0.002371  0.001832  0.002319 -0.001045  0.001477
## TLT -0.000859 -0.000881 -0.001137 -0.001161 -0.001045  0.001404 -0.000371
## IYR  0.001143  0.001170  0.001706  0.001533  0.001477 -0.000371  0.001941
## GLD  0.000213  0.000377  0.000852  0.000540  0.000404  0.000112  0.000483
##          GLD
## SPY 0.000213
## QQQ 0.000377
## EEM 0.000852
## IWM 0.000540
## EFA 0.000404
## TLT 0.000112
## IYR 0.000483
## GLD 0.002782
# Question8:Compute global minimum variance portfolio weights 
# Load quadprog package for optimization
library(quadprog)

# Define function to compute GMV weights
compute_gmv_weights <- function(cov_matrix) {
  n <- ncol(cov_matrix)
  Dmat <- cov_matrix
  dvec <- rep(0, n)  # No linear objective
  Amat <- matrix(1, nrow = n)  # Constraint: sum of weights = 1
  bvec <- 1
  result <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
  weights <- result$solution
  names(weights) <- colnames(cov_matrix)
  return(weights)
}

# Compute GMV weights
gmv_capm_weights <- compute_gmv_weights(capm_cov_matrix)
gmv_ff3_weights <- compute_gmv_weights(ff3_cov_matrix)

# Display results
cat("GMV Portfolio Weights (CAPM):\n")
## GMV Portfolio Weights (CAPM):
print(round(gmv_capm_weights, 4))
##     SPY     QQQ     EEM     IWM     EFA     TLT     IYR     GLD 
##  0.9709 -0.2010 -0.0157 -0.0547 -0.0106  0.4731 -0.2118  0.0499
cat("\nMV Portfolio Weights (FF3):\n")
## 
## MV Portfolio Weights (FF3):
print(round(gmv_ff3_weights, 4))
##     SPY     QQQ     EEM     IWM     EFA     TLT     IYR     GLD 
##  0.9144 -0.1754 -0.0154 -0.0449 -0.0009  0.4766 -0.2070  0.0526
# Question 9
library(quadprog)
library(ggplot2)
library(dplyr)
library(tibble)
library(purrr)

# Helper function: compute GMV return
get_gmv_return <- function(cov_matrix, next_month_returns) {
  n <- ncol(cov_matrix)
  Dmat <- cov_matrix
  dvec <- rep(0, n)
  Amat <- matrix(1, nrow = n)
  bvec <- 1

  result <- tryCatch(
    solve.QP(Dmat, dvec, Amat, bvec, meq = 1),
    error = function(e) return(rep(NA, n))
  )

  if (all(is.na(result))) return(NA)

  weights <- result$solution
  return(sum(weights * next_month_returns))
}

# Backtest function
gmv_backtest <- function(model = "CAPM") {
  portfolio_returns <- c()
  dates <- c()
  n_obs <- nrow(merged_data)

  for (i in (window_size + 1):(n_obs - 1)) {
    data_window <- merged_data[(i - window_size):(i - 1), ]
    next_month_returns <- merged_data[i, etf_cols] %>% as.numeric()

    if (any(is.na(next_month_returns))) next

    if (model == "FF3") {
      residuals <- map(etf_cols, function(etf) {
        df <- data_window %>%
          select(all_of(c(etf_cols, "Mkt_RF", "SMB", "HML"))) %>%
          mutate(y = data_window[[etf]])
        model_ff <- tryCatch(
          lm(y ~ Mkt_RF + SMB + HML, data = df),
          error = function(e) return(NA)
        )
        if (inherits(model_ff, "lm")) {
          return(resid(model_ff))
        } else {
          return(rep(NA, nrow(df)))
        }
      }) %>% bind_cols()
      colnames(residuals) <- etf_cols
      if (any(is.na(residuals))) next
      cov_matrix <- cov(residuals, use = "complete.obs")
    } else {
      returns_window <- data_window %>%
        mutate(across(all_of(etf_cols), ~ .x - RF)) %>%
        select(all_of(etf_cols)) %>%
        as.matrix()
      cov_matrix <- cov(returns_window, use = "complete.obs")
    }

    if (any(is.na(cov_matrix)) || any(dim(cov_matrix) != c(length(etf_cols), length(etf_cols)))) next

    ret <- get_gmv_return(cov_matrix, next_month_returns)
    portfolio_returns <- c(portfolio_returns, ret)
    dates <- c(dates, merged_data$date[i])
  }

  return(tibble(date = dates, return = portfolio_returns))
}

# Run backtest
window_size <- 60
etf_cols <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")

gmv_capm <- gmv_backtest("CAPM") %>% mutate(model = "CAPM")
gmv_ff3  <- gmv_backtest("FF3")  %>% mutate(model = "FF3")
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
# Combine and visualize
gmv_combined <- bind_rows(gmv_capm, gmv_ff3) %>%
  group_by(model) %>%
  arrange(date) %>%
  mutate(cumulative = cumprod(1 + return))

ggplot(gmv_combined, aes(x = date, y = cumulative, color = model)) +
  geom_line(size = 1.2) +
  labs(title = "📈 GMV Portfolio Backtest: CAPM vs FF3",
       x = "Date", y = "Cumulative Return") +
  theme_minimal() +
  scale_color_manual(values = c("CAPM" = "forestgreen", "FF3" = "steelblue"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Final performance stats
gmv_summary <- gmv_combined %>%
  group_by(model) %>%
  summarise(
    avg_return = mean(return),
    volatility = sd(return),
    sharpe = mean(return) / sd(return),
    final_value = last(cumulative)
  )

print(gmv_summary)
## # A tibble: 2 × 5
##   model avg_return volatility sharpe final_value
##   <chr>      <dbl>      <dbl>  <dbl>       <dbl>
## 1 CAPM     0.00393     0.0262  0.150        1.56
## 2 FF3      0.00373     0.0262  0.142        1.53