Part I:

1: Download ETF Data

tickers <- c("SPY","QQQ","EEM","IWM","EFA","TLT","IYR","GLD")

getSymbols(tickers, from = "2010-01-01", to = "2025-04-30")
## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"

Extract adjusted prices:

prices <- map(tickers, ~ Ad(get(.x))) %>%
  reduce(merge)

colnames(prices) <- tickers

2: Monthly Returns

monthly_prices <- to.monthly(prices, indexAt = "lastof", OHLC = FALSE)

monthly_returns <- data.frame(
  Date = index(monthly_prices),
  coredata(monthly_prices)
) %>%
  arrange(Date) %>%
  mutate(across(-Date, ~ . / lag(.) - 1)) %>%
  drop_na()

head(monthly_returns)
##         Date         SPY         QQQ          EEM         IWM          EFA
## 1 2010-02-28  0.03119479  0.04603867  0.017763985  0.04475136  0.002667621
## 2 2010-03-31  0.06087947  0.07710906  0.081108567  0.08230683  0.063853969
## 3 2010-04-30  0.01547016  0.02242518 -0.001661558  0.05678475 -0.028045561
## 4 2010-05-31 -0.07945456 -0.07392382 -0.093935667 -0.07536681 -0.111928230
## 5 2010-06-30 -0.05174091 -0.05975678 -0.013986846 -0.07743395 -0.020619293
## 6 2010-07-31  0.06830056  0.07258261  0.109324567  0.06730920  0.116104008
##            TLT         IYR          GLD
## 1 -0.003424699  0.05457048  0.032748219
## 2 -0.020572105  0.09748515 -0.004386396
## 3  0.033217315  0.06388081  0.058834363
## 4  0.051083516 -0.05683535  0.030513147
## 5  0.057978631 -0.04670103  0.023553189
## 6 -0.009464227  0.09404763 -0.050871157

3: Fama-French 3 Factors

url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"

temp <- tempfile()
download.file(url, temp)

ff_raw <- read_csv(unz(temp, "F-F_Research_Data_Factors.csv"), skip = 3)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 1298 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (4): Mkt-RF, SMB, HML, RF
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Clean + align to END OF MONTH:

ff <- ff_raw %>%
  rename(Date = ...1) %>%
  filter(!is.na(Date)) %>%
  filter(Date >= 201001 & Date <= 202504) %>%
  mutate(across(-Date, as.numeric)) %>%
  mutate(across(-Date, ~ . / 100)) %>%
  mutate(Date = ymd(paste0(Date, "01"))) %>%
  mutate(Date = ceiling_date(Date, "month") - days(1))  # 🔥 CRITICAL
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Date = ymd(paste0(Date, "01"))`.
## Caused by warning:
## !  13 failed to parse.
head(ff)
## # A tibble: 6 × 5
##   Date       `Mkt-RF`     SMB     HML     RF
##   <date>        <dbl>   <dbl>   <dbl>  <dbl>
## 1 2010-01-31  -0.0335  0.0043  0.0033 0     
## 2 2010-02-28   0.0339  0.0118  0.0318 0     
## 3 2010-03-31   0.063   0.0146  0.0219 0.0001
## 4 2010-04-30   0.0199  0.0484  0.0296 0.0001
## 5 2010-05-31  -0.079   0.0013 -0.0248 0.0001
## 6 2010-06-30  -0.0556 -0.0179 -0.0473 0.0001

4: Merge Data

data_all <- left_join(monthly_returns, ff, by = "Date") %>%
  drop_na()

head(data_all)
##         Date         SPY         QQQ          EEM         IWM          EFA
## 1 2010-02-28  0.03119479  0.04603867  0.017763985  0.04475136  0.002667621
## 2 2010-03-31  0.06087947  0.07710906  0.081108567  0.08230683  0.063853969
## 3 2010-04-30  0.01547016  0.02242518 -0.001661558  0.05678475 -0.028045561
## 4 2010-05-31 -0.07945456 -0.07392382 -0.093935667 -0.07536681 -0.111928230
## 5 2010-06-30 -0.05174091 -0.05975678 -0.013986846 -0.07743395 -0.020619293
## 6 2010-07-31  0.06830056  0.07258261  0.109324567  0.06730920  0.116104008
##            TLT         IYR          GLD  Mkt-RF     SMB     HML    RF
## 1 -0.003424699  0.05457048  0.032748219  0.0339  0.0118  0.0318 0e+00
## 2 -0.020572105  0.09748515 -0.004386396  0.0630  0.0146  0.0219 1e-04
## 3  0.033217315  0.06388081  0.058834363  0.0199  0.0484  0.0296 1e-04
## 4  0.051083516 -0.05683535  0.030513147 -0.0790  0.0013 -0.0248 1e-04
## 5  0.057978631 -0.04670103  0.023553189 -0.0556 -0.0179 -0.0473 1e-04
## 6 -0.009464227  0.09404763 -0.050871157  0.0692  0.0022 -0.0050 1e-04

5: MVP (Sample Covariance)

window_data <- data_all %>%
  filter(Date >= as.Date("2020-03-31") & Date <= as.Date("2025-02-28"))

nrow(window_data)   # should be ~60
## [1] 62
R <- as.matrix(window_data[, tickers])

Sigma <- cov(R)

one <- rep(1, ncol(R))

w_mvp <- solve(Sigma) %*% one / as.numeric(t(one) %*% solve(Sigma) %*% one)

w_mvp
##            [,1]
## SPY  1.50933140
## QQQ -0.72082250
## EEM  0.08843009
## IWM  0.01255989
## EFA -0.20232062
## TLT  0.52190481
## IYR -0.52336297
## GLD  0.31427990

6: MVP using FF 3-Factor Model

Estimate Betas Safely

betas <- map_dfr(tickers, function(tk) {

  df <- window_data %>%
    select(all_of(tk), `Mkt-RF`, SMB, HML) %>%
    drop_na()

  model <- lm(df[[1]] ~ ., data = df[, -1])

  tibble(asset = tk, t(coef(model)[-1]))
})

betas
## # A tibble: 8 × 2
##   asset `t(coef(model)[-1])`[,1]    [,2]    [,3]
##   <chr>                    <dbl>   <dbl>   <dbl>
## 1 SPY                      0.890 -0.151  -0.118 
## 2 QQQ                      1.01  -0.0883 -0.497 
## 3 EEM                      0.654  0.0176  0.0532
## 4 IWM                      0.963  0.734   0.0644
## 5 EFA                      0.778 -0.179   0.0604
## 6 TLT                      0.316 -0.0511 -0.286 
## 7 IYR                      0.883  0.0433  0.0461
## 8 GLD                      0.250 -0.288   0.0365

Factor Covariance

Sigma_f <- cov(window_data %>% select(`Mkt-RF`, SMB, HML))

Asset Covariance via FF Model

B <- as.matrix(betas[, -1])

Sigma_ff <- B %*% Sigma_f %*% t(B)

# Regularization (prevents singular matrix)
Sigma_ff <- Sigma_ff + diag(1e-6, nrow(Sigma_ff))

Compute MVP Weights

one <- rep(1, ncol(Sigma_ff))

w_mvp_ff <- solve(Sigma_ff) %*% one / as.numeric(t(one) %*% solve(Sigma_ff) %*% one)

w_mvp_ff
##            [,1]
## [1,] -0.2301041
## [2,] -0.3050892
## [3,]  0.2070706
## [4,]  0.1900895
## [5,] -0.1103467
## [6,]  0.7426309
## [7,] -0.1263138
## [8,]  0.6320629

7: Realized Return (March 2025)

march <- data_all %>% filter(Date == as.Date("2025-03-31"))

r_march <- as.numeric(march[, tickers])

ret_capm <- sum(w_mvp * r_march)
ret_ff   <- sum(w_mvp_ff * r_march)

ret_capm
## [1] 0.005992885
ret_ff
## [1] 0.07879768

8: April 2025 (Rolling Window)

window_april <- data_all %>%
  filter(Date >= as.Date("2020-04-30") & Date <= as.Date("2025-03-31"))

R2 <- as.matrix(window_april[, tickers])

Sigma2 <- cov(R2)

one <- rep(1, ncol(R2))

w_mvp2 <- solve(Sigma2) %*% one / as.numeric(t(one) %*% solve(Sigma2) %*% one)
april <- data_all %>% filter(Date == as.Date("2025-04-30"))

r_april <- as.numeric(april[, tickers])

ret_april <- sum(w_mvp2 * r_april)

ret_april
## [1] -0.001650694

Part II: Textbook Problem Sets

📘 Chapter 5 – French Data Analysis

Load Data

# Skip metadata rows (French files usually need this)
data_raw <- read_csv("6_Portfolios_2x3.csv", skip = 11)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 8899 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): The  portfolios  include utilities and include financials.
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data_raw)
## # A tibble: 6 × 1
##   `The  portfolios  include utilities and include financials.`      
##   <chr>                                                             
## 1 Average Value Weighted Returns -- Monthly                         
## 2 ,SMALL LoBM,ME1 BM2,SMALL HiBM,BIG LoBM,ME2 BM2,BIG HiBM          
## 3 192607,   1.0866,   0.8807,  -0.1275,   5.5746,   1.9060,   2.0068
## 4 192608,   0.7831,   1.4677,   5.4422,   2.7268,   2.7028,   5.6834
## 5 192609,  -2.8045,  -0.0599,  -0.4399,   1.4777,   0.0954,  -0.7872
## 6 192610,  -4.0289,  -4.3615,  -2.0128,  -3.6327,  -2.3451,  -4.0040

Clean Data (ROBUST)

# Rename first column
colnames(data_raw)[1] <- "Date"

# Remove footer rows (non-numeric dates)
data <- data_raw %>%
  filter(!is.na(as.numeric(Date)))
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `!is.na(as.numeric(Date))`.
## Caused by warning:
## ! NAs introduced by coercion
# Convert Date
data$Date <- as.Date(paste0(data$Date, "01"), format="%Y%m%d")

# Convert everything else safely
data[,-1] <- data[,-1] %>%
  mutate(across(everything(), ~as.numeric(.)))

# Drop NA
data <- na.omit(data)

str(data)
## tibble [0 × 1] (S3: tbl_df/tbl/data.frame)
##  $ Date: 'Date' num(0)

Split Data

midpoint <- floor(nrow(data) / 2)

data_1 <- data[1:midpoint, ]
data_2 <- data[(midpoint + 1):nrow(data), ]

Compute Statistics

compute_stats <- function(df) {
  df %>%
    summarise(across(-Date, list(
      mean = ~mean(.),
      sd = ~sd(.),
      skew = ~moments::skewness(.),
      kurt = ~moments::kurtosis(.)
    ), .names = "{col}_{fn}"))
}

Apply Function

stats_1 <- compute_stats(data_1)
stats_2 <- compute_stats(data_2)

stats_1
## # A tibble: 1 × 0
stats_2
## # A tibble: 1 × 0

Compare

comparison <- bind_rows(
  Period1 = stats_1,
  Period2 = stats_2,
  .id = "Period"
)

comparison
## # A tibble: 2 × 1
##   Period 
##   <chr>  
## 1 Period1
## 2 Period2

Visualization

# Make sure there are columns to pivot
if(ncol(data) > 1){
  
  data_long <- data %>%
    pivot_longer(cols = -Date,
                 names_to = "Portfolio",
                 values_to = "Return")

  ggplot(data_long, aes(x = Return, fill = Portfolio)) +
    geom_histogram(bins = 50, alpha = 0.6) +
    facet_wrap(~Portfolio, scales = "free") +
    theme_minimal()

} else {
  print("Data format issue: no columns available for plotting.")
}
## [1] "Data format issue: no columns available for plotting."

Interpretation

The summary statistics differ across the two periods in mean, volatility, and higher moments. This suggests the return distributions are not stable over time, indicating the two subsamples likely do not come from the same distribution.


📘 Chapter 6 – Q21

Er_p <- 0.11
sigma_p <- 0.15
rf <- 0.05
target_return <- 0.08

y <- (target_return - rf) / (Er_p - rf)
sigma_c <- y * sigma_p
y2 <- 0.12 / sigma_p

y
## [1] 0.5
sigma_c
## [1] 0.075
y2
## [1] 0.8

📘 Chapter 6 – Q22

Er_m <- 0.12
sigma_m <- 0.20
rf <- 0.05

y <- 0.10 / sigma_m
Er_c <- rf + y * (Er_m - rf)

y
## [1] 0.5
Er_c
## [1] 0.085

📘 Chapter 7 – Q8

risk_premium <- 0.10
rf <- 0.06
sigma_p <- 0.14

w_risky <- 0.6
w_rf <- 0.4

Er_p <- rf + risk_premium
Er_c <- w_risky * Er_p + w_rf * rf
sigma_c <- w_risky * sigma_p

Er_p
## [1] 0.16
Er_c
## [1] 0.12
sigma_c
## [1] 0.084

📘 Chapter 7 – Q12

Er_A <- 0.10
sigma_A <- 0.05

Er_B <- 0.15
sigma_B <- 0.10

w_A <- sigma_B / (sigma_A + sigma_B)
w_B <- 1 - w_A

rf <- w_A * Er_A + w_B * Er_B

w_A
## [1] 0.6666667
w_B
## [1] 0.3333333
rf
## [1] 0.1166667

📘 Chapter 8 – Q12

w_old <- 0.9
w_abc <- 0.1

Er_old <- 0.0067
Er_abc <- 0.0125

sigma_old <- 0.0237
sigma_abc <- 0.0295

rho <- 0.4

Er_new <- w_old * Er_old + w_abc * Er_abc
cov_ab <- rho * sigma_old * sigma_abc

var_new <- (w_old^2 * sigma_old^2) +
           (w_abc^2 * sigma_abc^2) +
           (2 * w_old * w_abc * cov_ab)

sigma_new <- sqrt(var_new)

Er_new
## [1] 0.00728
sigma_new
## [1] 0.02267179

📘 Chapter 8 – T-Bill

rf <- 0.0042

Er_tbill <- w_old * Er_old + w_abc * rf
sigma_tbill <- w_old * sigma_old

Er_tbill
## [1] 0.00645
sigma_tbill
## [1] 0.02133

📘 Chapter 8 – Q17

stocks <- data.frame(
  name = c("A","B","C"),
  Eri = c(0.12, 0.10, 0.14),
  beta = c(1.2, 0.8, 1.5),
  residual_var = c(0.04, 0.03, 0.05)
)

rf <- 0.05
Er_m <- 0.12

stocks$alpha <- stocks$Eri - (rf + stocks$beta * (Er_m - rf))
stocks$alpha_ratio <- stocks$alpha / stocks$residual_var

stocks <- stocks[order(-stocks$alpha_ratio), ]

stocks
##   name  Eri beta residual_var  alpha alpha_ratio
## 2    B 0.10  0.8         0.03 -0.006       -0.20
## 3    C 0.14  1.5         0.05 -0.015       -0.30
## 1    A 0.12  1.2         0.04 -0.014       -0.35