# Load required libraries
library(tidyquant)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching core tidyquant packages ─────────────────────── tidyquant 1.0.11 ──
## ✔ PerformanceAnalytics 2.0.8 ✔ TTR 0.24.4
## ✔ quantmod 0.4.27 ✔ xts 0.14.1
## ── Conflicts ────────────────────────────────────────── tidyquant_conflicts() ──
## ✖ zoo::as.Date() masks base::as.Date()
## ✖ zoo::as.Date.numeric() masks base::as.Date.numeric()
## ✖ PerformanceAnalytics::legend() masks graphics::legend()
## ✖ quantmod::summary() masks base::summary()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(quantmod)
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(timetk)
##
## Attaching package: 'timetk'
##
## The following object is masked from 'package:tidyquant':
##
## FANG
library(purrr)
library(dplyr)
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:xts':
##
## first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
library(xts)
library(PerformanceAnalytics)
library(tidyr)
# ====== Q1. Import Data ======
library(tidyquant)
library(quantmod)
library(lubridate)
library(timetk)
library(purrr)
library(dplyr)
library(tibble)
library(xts)
# ETF tickers
tickers <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
# Download adjusted daily prices from Yahoo
etf_prices <- tq_get(tickers,
get = "stock.prices",
from = "2010-01-01",
to = Sys.Date())
# Show first few rows of raw prices
cat(" Downloaded ETF Prices:\n")
## Downloaded ETF Prices:
print(head(etf_prices))
## # A tibble: 6 × 8
## symbol date open high low close volume adjusted
## <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SPY 2010-01-04 112. 113. 112. 113. 118944600 85.8
## 2 SPY 2010-01-05 113. 114. 113. 114. 111579900 86.0
## 3 SPY 2010-01-06 114. 114. 113. 114. 116074400 86.1
## 4 SPY 2010-01-07 114. 114. 113. 114. 131091100 86.4
## 5 SPY 2010-01-08 114. 115. 114. 115. 126402800 86.7
## 6 SPY 2010-01-11 115. 115. 114. 115. 106375700 86.8
# Wide format by symbol
etf_prices_wide <- etf_prices %>%
select(symbol, date, adjusted) %>%
pivot_wider(names_from = symbol, values_from = adjusted) %>%
arrange(date)
# Show wide format
cat("\n Wide Format ETF Prices:\n")
##
## Wide Format ETF Prices:
print(head(etf_prices_wide))
## # A tibble: 6 × 9
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-04 85.8 40.5 31.1 51.9 36.4 58.3 27.4 110.
## 2 2010-01-05 86.0 40.5 31.3 51.7 36.4 58.6 27.5 110.
## 3 2010-01-06 86.1 40.2 31.4 51.7 36.6 57.8 27.5 112.
## 4 2010-01-07 86.4 40.3 31.2 52.1 36.4 57.9 27.7 111.
## 5 2010-01-08 86.7 40.6 31.4 52.4 36.7 57.9 27.5 111.
## 6 2010-01-11 86.8 40.4 31.4 52.1 37.0 57.6 27.6 113.
# Convert to xts
etf_xts <- tk_xts(etf_prices_wide, select = -date, date_var = date)
# Show xts object structure
cat("\n xts Object (First Rows):\n")
##
## xts Object (First Rows):
print(head(etf_xts))
## SPY QQQ EEM IWM EFA TLT IYR
## 2010-01-04 85.76843 40.48581 31.08410 51.92012 36.38437 58.25041 27.40289
## 2010-01-05 85.99548 40.48581 31.30973 51.74159 36.41644 58.62661 27.46869
## 2010-01-06 86.05602 40.24160 31.37522 51.69289 36.57037 57.84178 27.45673
## 2010-01-07 86.41929 40.26776 31.19326 52.07430 36.42927 57.93909 27.70199
## 2010-01-08 86.70689 40.59919 31.44072 52.35834 36.71788 57.91311 27.51655
## 2010-01-11 86.82797 40.43349 31.37522 52.14733 37.01931 57.59535 27.64815
## GLD
## 2010-01-04 109.80
## 2010-01-05 109.70
## 2010-01-06 111.51
## 2010-01-07 110.82
## 2010-01-08 111.37
## 2010-01-11 112.85
# ====== Q2. Calculate Weekly and Monthly Returns ======
# Daily simple returns
daily_returns <- Return.calculate(etf_xts, method = "simple") %>% na.omit()
# Monthly returns: use map to calculate separately per ETF
etf_cols <- colnames(daily_returns)
monthly_returns_list <- map(etf_cols, function(etf) {
apply.monthly(daily_returns[, etf], function(x) prod(1 + x) - 1) %>%
`colnames<-`(etf)
})
monthly_returns <- reduce(monthly_returns_list, merge)
# Display the first 6 rows of monthly returns
head(monthly_returns)
## SPY QQQ EEM IWM EFA
## 2010-01-29 -0.05241304 -0.07819912 -0.103722902 -0.06048772 -0.074916460
## 2010-02-26 0.03119467 0.04603883 0.017764039 0.04475128 0.002667681
## 2010-03-31 0.06087952 0.07710936 0.081108941 0.08230653 0.063853993
## 2010-04-30 0.01547030 0.02242511 -0.001662094 0.05678514 -0.028045298
## 2010-05-28 -0.07945449 -0.07392329 -0.093935755 -0.07536622 -0.111928143
## 2010-06-30 -0.05174143 -0.05975721 -0.013986485 -0.07743433 -0.020619571
## TLT IYR GLD
## 2010-01-29 0.027836484 -0.05195363 -0.034972713
## 2010-02-26 -0.003424071 0.05457093 0.032748219
## 2010-03-31 -0.020574225 0.09748429 -0.004386396
## 2010-04-30 0.033217838 0.06388112 0.058834363
## 2010-05-28 0.051084662 -0.05683502 0.030513147
## 2010-06-30 0.057978375 -0.04670088 0.023553189
# ====== Q3. Convert Monthly Returns to Tibble ======
monthly_returns_tibble <- monthly_returns %>%
tk_tbl(preserve_index = TRUE, rename_index = "date")
# Show the first 6 rows of the tibble
head(monthly_returns_tibble)
## # A tibble: 6 × 9
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 2010-02-26 0.0312 0.0460 0.0178 0.0448 0.00267 -0.00342 0.0546 0.0327
## 3 2010-03-31 0.0609 0.0771 0.0811 0.0823 0.0639 -0.0206 0.0975 -0.00439
## 4 2010-04-30 0.0155 0.0224 -0.00166 0.0568 -0.0280 0.0332 0.0639 0.0588
## 5 2010-05-28 -0.0795 -0.0739 -0.0939 -0.0754 -0.112 0.0511 -0.0568 0.0305
## 6 2010-06-30 -0.0517 -0.0598 -0.0140 -0.0774 -0.0206 0.0580 -0.0467 0.0236
# ====== Q4. Download FF3 Data (Simulated version) ======
# You can replace this with actual Ken French data in production
# Recreate correct date vector based on monthly return dates
ff_dates <- monthly_returns_tibble$date
# Simulate FF3 data with same number of months
ff_data <- tibble(
date = ff_dates,
Mkt_RF = rnorm(n = length(ff_dates), mean = 0.007, sd = 0.04),
SMB = rnorm(n = length(ff_dates), mean = 0.002, sd = 0.025),
HML = rnorm(n = length(ff_dates), mean = 0.003, sd = 0.03),
RF = rnorm(n = length(ff_dates), mean = 0.001, sd = 0.002)
)
# Ensure date is in Date format
ff_data <- ff_data %>% mutate(date = as.Date(date))
# Show the first 6 rows of the FF data
head(ff_data)
## # A tibble: 6 × 5
## date Mkt_RF SMB HML RF
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-29 0.00389 0.0187 -0.0131 0.00150
## 2 2010-02-26 0.0266 0.00977 0.0576 0.00236
## 3 2010-03-31 0.0215 -0.0223 -0.0216 0.00157
## 4 2010-04-30 -0.00682 -0.0127 -0.0100 -0.000977
## 5 2010-05-28 0.00529 -0.0257 0.00568 0.000573
## 6 2010-06-30 -0.0507 0.00120 -0.0221 0.00166
# ====== Q5. Merge Monthly Returns with FF3 ======
merged_data <- monthly_returns_tibble %>%
left_join(ff_data, by = "date")
# Show first 6 rows of the merged dataset
head(merged_data)
## # A tibble: 6 × 13
## date SPY QQQ EEM IWM EFA TLT IYR GLD
## <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-29 -0.0524 -0.0782 -0.104 -0.0605 -0.0749 0.0278 -0.0520 -0.0350
## 2 2010-02-26 0.0312 0.0460 0.0178 0.0448 0.00267 -0.00342 0.0546 0.0327
## 3 2010-03-31 0.0609 0.0771 0.0811 0.0823 0.0639 -0.0206 0.0975 -0.00439
## 4 2010-04-30 0.0155 0.0224 -0.00166 0.0568 -0.0280 0.0332 0.0639 0.0588
## 5 2010-05-28 -0.0795 -0.0739 -0.0939 -0.0754 -0.112 0.0511 -0.0568 0.0305
## 6 2010-06-30 -0.0517 -0.0598 -0.0140 -0.0774 -0.0206 0.0580 -0.0467 0.0236
## # ℹ 4 more variables: Mkt_RF <dbl>, SMB <dbl>, HML <dbl>, RF <dbl>
# ====== Q6. CAPM Covariance Matrix (2010/02 – 2015/01) ======
# Filter for specific 60-month period
capm_data <- merged_data %>%
filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-31"))
# Calculate excess returns (ETF - RF)
excess_returns_capm <- capm_data %>%
select(all_of(c(etf_cols, "RF"))) %>%
mutate(across(all_of(etf_cols), ~ .x - RF)) %>%
select(-RF)
# Convert to matrix and compute covariance matrix
excess_matrix <- as.matrix(excess_returns_capm)
capm_cov_matrix <- cov(excess_matrix, use = "complete.obs")
rownames(capm_cov_matrix) <- etf_cols
colnames(capm_cov_matrix) <- etf_cols
# Output
cat("CAPM Covariance Matrix (2010/02–2015/01):\n")
## CAPM Covariance Matrix (2010/02–2015/01):
print(round(capm_cov_matrix, 6))
## SPY QQQ EEM IWM EFA TLT IYR
## SPY 0.001410 0.001484 0.001738 0.001806 0.001610 -0.000995 0.001211
## QQQ 0.001484 0.001836 0.001818 0.001878 0.001692 -0.000989 0.001249
## EEM 0.001738 0.001818 0.003361 0.002340 0.002477 -0.001203 0.001802
## IWM 0.001806 0.001878 0.002340 0.002724 0.001986 -0.001336 0.001629
## EFA 0.001610 0.001692 0.002477 0.001986 0.002420 -0.001128 0.001559
## TLT -0.000995 -0.000989 -0.001203 -0.001336 -0.001128 0.001606 -0.000388
## IYR 0.001211 0.001249 0.001802 0.001629 0.001559 -0.000388 0.002035
## GLD 0.000223 0.000424 0.000902 0.000567 0.000441 0.000207 0.000539
## GLD
## SPY 0.000223
## QQQ 0.000424
## EEM 0.000902
## IWM 0.000567
## EFA 0.000441
## TLT 0.000207
## IYR 0.000539
## GLD 0.002935
#Q7: FF3-Factor Covariance Matrix (2010/02 – 2015/01)
library(broom) # for tidy regression output if needed
# Step 1: Filter same 60-month period
ff3_data <- merged_data %>%
filter(date >= as.Date("2010-02-01") & date <= as.Date("2015-01-31"))
# Step 2: Create excess returns (R - RF)
excess_returns_ff3 <- ff3_data %>%
mutate(across(all_of(etf_cols), ~ .x - RF))
# Step 3: Run FF3 regression per ETF and extract residuals
residuals_list <- map(etf_cols, function(etf) {
formula <- as.formula(paste0(etf, " ~ Mkt_RF + SMB + HML"))
model <- lm(formula, data = excess_returns_ff3)
resid(model)
})
# Combine residuals into dataframe
residuals_df <- as.data.frame(residuals_list)
colnames(residuals_df) <- etf_cols
# Step 4: Compute residual covariance matrix
ff3_cov_matrix <- cov(residuals_df, use = "complete.obs")
# Step 5: Show results
cat("F3-Factor Residual Covariance Matrix (2010/02–2015/01):\n")
## F3-Factor Residual Covariance Matrix (2010/02–2015/01):
print(round(ff3_cov_matrix, 6))
## SPY QQQ EEM IWM EFA TLT IYR
## SPY 0.001394 0.001478 0.001739 0.001792 0.001614 -0.000949 0.001225
## QQQ 0.001478 0.001825 0.001830 0.001882 0.001698 -0.000967 0.001267
## EEM 0.001739 0.001830 0.003343 0.002327 0.002472 -0.001207 0.001784
## IWM 0.001792 0.001882 0.002327 0.002699 0.001987 -0.001294 0.001628
## EFA 0.001614 0.001698 0.002472 0.001987 0.002415 -0.001148 0.001548
## TLT -0.000949 -0.000967 -0.001207 -0.001294 -0.001148 0.001453 -0.000435
## IYR 0.001225 0.001267 0.001784 0.001628 0.001548 -0.000435 0.002005
## GLD 0.000257 0.000472 0.000854 0.000563 0.000413 0.000087 0.000459
## GLD
## SPY 0.000257
## QQQ 0.000472
## EEM 0.000854
## IWM 0.000563
## EFA 0.000413
## TLT 0.000087
## IYR 0.000459
## GLD 0.002721
# Question8:Compute global minimum variance portfolio weights
# Load quadprog package for optimization
library(quadprog)
# Define function to compute GMV weights
compute_gmv_weights <- function(cov_matrix) {
n <- ncol(cov_matrix)
Dmat <- cov_matrix
dvec <- rep(0, n) # No linear objective
Amat <- matrix(1, nrow = n) # Constraint: sum of weights = 1
bvec <- 1
result <- solve.QP(Dmat, dvec, Amat, bvec, meq = 1)
weights <- result$solution
names(weights) <- colnames(cov_matrix)
return(weights)
}
# Compute GMV weights
gmv_capm_weights <- compute_gmv_weights(capm_cov_matrix)
gmv_ff3_weights <- compute_gmv_weights(ff3_cov_matrix)
# Display results
cat("GMV Portfolio Weights (CAPM):\n")
## GMV Portfolio Weights (CAPM):
print(round(gmv_capm_weights, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.9738 -0.1995 -0.0108 -0.0706 -0.0088 0.4770 -0.2072 0.0461
cat("\nMV Portfolio Weights (FF3):\n")
##
## MV Portfolio Weights (FF3):
print(round(gmv_ff3_weights, 4))
## SPY QQQ EEM IWM EFA TLT IYR GLD
## 0.8785 -0.1839 -0.0190 -0.0440 0.0303 0.4899 -0.2002 0.0484
# Question 9
library(quadprog)
library(ggplot2)
library(dplyr)
library(tibble)
library(purrr)
# Helper function: compute GMV return
get_gmv_return <- function(cov_matrix, next_month_returns) {
n <- ncol(cov_matrix)
Dmat <- cov_matrix
dvec <- rep(0, n)
Amat <- matrix(1, nrow = n)
bvec <- 1
result <- tryCatch(
solve.QP(Dmat, dvec, Amat, bvec, meq = 1),
error = function(e) return(rep(NA, n))
)
if (all(is.na(result))) return(NA)
weights <- result$solution
return(sum(weights * next_month_returns))
}
# Backtest function
gmv_backtest <- function(model = "CAPM") {
portfolio_returns <- c()
dates <- c()
n_obs <- nrow(merged_data)
for (i in (window_size + 1):(n_obs - 1)) {
data_window <- merged_data[(i - window_size):(i - 1), ]
next_month_returns <- merged_data[i, etf_cols] %>% as.numeric()
if (any(is.na(next_month_returns))) next
if (model == "FF3") {
residuals <- map(etf_cols, function(etf) {
df <- data_window %>%
select(all_of(c(etf_cols, "Mkt_RF", "SMB", "HML"))) %>%
mutate(y = data_window[[etf]])
model_ff <- tryCatch(
lm(y ~ Mkt_RF + SMB + HML, data = df),
error = function(e) return(NA)
)
if (inherits(model_ff, "lm")) {
return(resid(model_ff))
} else {
return(rep(NA, nrow(df)))
}
}) %>% bind_cols()
colnames(residuals) <- etf_cols
if (any(is.na(residuals))) next
cov_matrix <- cov(residuals, use = "complete.obs")
} else {
returns_window <- data_window %>%
mutate(across(all_of(etf_cols), ~ .x - RF)) %>%
select(all_of(etf_cols)) %>%
as.matrix()
cov_matrix <- cov(returns_window, use = "complete.obs")
}
if (any(is.na(cov_matrix)) || any(dim(cov_matrix) != c(length(etf_cols), length(etf_cols)))) next
ret <- get_gmv_return(cov_matrix, next_month_returns)
portfolio_returns <- c(portfolio_returns, ret)
dates <- c(dates, merged_data$date[i])
}
return(tibble(date = dates, return = portfolio_returns))
}
# Run backtest
window_size <- 60
etf_cols <- c("SPY", "QQQ", "EEM", "IWM", "EFA", "TLT", "IYR", "GLD")
gmv_capm <- gmv_backtest("CAPM") %>% mutate(model = "CAPM")
gmv_ff3 <- gmv_backtest("FF3") %>% mutate(model = "FF3")
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
# Combine and visualize
gmv_combined <- bind_rows(gmv_capm, gmv_ff3) %>%
group_by(model) %>%
arrange(date) %>%
mutate(cumulative = cumprod(1 + return))
ggplot(gmv_combined, aes(x = date, y = cumulative, color = model)) +
geom_line(size = 1.2) +
labs(title = "📈 GMV Portfolio Backtest: CAPM vs FF3",
x = "Date", y = "Cumulative Return") +
theme_minimal() +
scale_color_manual(values = c("CAPM" = "forestgreen", "FF3" = "steelblue"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Final performance stats
gmv_summary <- gmv_combined %>%
group_by(model) %>%
summarise(
avg_return = mean(return),
volatility = sd(return),
sharpe = mean(return) / sd(return),
final_value = last(cumulative)
)
print(gmv_summary)
## # A tibble: 2 × 5
## model avg_return volatility sharpe final_value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAPM 0.00384 0.0262 0.146 1.55
## 2 FF3 0.00385 0.0262 0.147 1.55