tickers <- c("SPY","QQQ","EEM","IWM","EFA","TLT","IYR","GLD")
getSymbols(tickers, from = "2010-01-01", to = "2025-04-30")
## [1] "SPY" "QQQ" "EEM" "IWM" "EFA" "TLT" "IYR" "GLD"
Extract adjusted prices:
prices <- map(tickers, ~ Ad(get(.x))) %>%
reduce(merge)
colnames(prices) <- tickers
monthly_prices <- to.monthly(prices, indexAt = "lastof", OHLC = FALSE)
monthly_returns <- data.frame(
Date = index(monthly_prices),
coredata(monthly_prices)
) %>%
arrange(Date) %>%
mutate(across(-Date, ~ . / lag(.) - 1)) %>%
drop_na()
head(monthly_returns)
## Date SPY QQQ EEM IWM EFA
## 1 2010-02-28 0.03119479 0.04603867 0.017763985 0.04475136 0.002667621
## 2 2010-03-31 0.06087947 0.07710906 0.081108567 0.08230683 0.063853969
## 3 2010-04-30 0.01547016 0.02242518 -0.001661558 0.05678475 -0.028045561
## 4 2010-05-31 -0.07945456 -0.07392382 -0.093935667 -0.07536681 -0.111928230
## 5 2010-06-30 -0.05174091 -0.05975678 -0.013986846 -0.07743395 -0.020619293
## 6 2010-07-31 0.06830056 0.07258261 0.109324567 0.06730920 0.116104008
## TLT IYR GLD
## 1 -0.003424699 0.05457048 0.032748219
## 2 -0.020572105 0.09748515 -0.004386396
## 3 0.033217315 0.06388081 0.058834363
## 4 0.051083516 -0.05683535 0.030513147
## 5 0.057978631 -0.04670103 0.023553189
## 6 -0.009464227 0.09404763 -0.050871157
url <- "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip"
temp <- tempfile()
download.file(url, temp)
ff_raw <- read_csv(unz(temp, "F-F_Research_Data_Factors.csv"), skip = 3)
## New names:
## • `` -> `...1`
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 1298 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): ...1
## dbl (4): Mkt-RF, SMB, HML, RF
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Clean + align to END OF MONTH:
ff <- ff_raw %>%
rename(Date = ...1) %>%
filter(!is.na(Date)) %>%
filter(Date >= 201001 & Date <= 202504) %>%
mutate(across(-Date, as.numeric)) %>%
mutate(across(-Date, ~ . / 100)) %>%
mutate(Date = ymd(paste0(Date, "01"))) %>%
mutate(Date = ceiling_date(Date, "month") - days(1)) # 🔥 CRITICAL
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Date = ymd(paste0(Date, "01"))`.
## Caused by warning:
## ! 13 failed to parse.
head(ff)
## # A tibble: 6 × 5
## Date `Mkt-RF` SMB HML RF
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2010-01-31 -0.0335 0.0043 0.0033 0
## 2 2010-02-28 0.0339 0.0118 0.0318 0
## 3 2010-03-31 0.063 0.0146 0.0219 0.0001
## 4 2010-04-30 0.0199 0.0484 0.0296 0.0001
## 5 2010-05-31 -0.079 0.0013 -0.0248 0.0001
## 6 2010-06-30 -0.0556 -0.0179 -0.0473 0.0001
data_all <- left_join(monthly_returns, ff, by = "Date") %>%
drop_na()
head(data_all)
## Date SPY QQQ EEM IWM EFA
## 1 2010-02-28 0.03119479 0.04603867 0.017763985 0.04475136 0.002667621
## 2 2010-03-31 0.06087947 0.07710906 0.081108567 0.08230683 0.063853969
## 3 2010-04-30 0.01547016 0.02242518 -0.001661558 0.05678475 -0.028045561
## 4 2010-05-31 -0.07945456 -0.07392382 -0.093935667 -0.07536681 -0.111928230
## 5 2010-06-30 -0.05174091 -0.05975678 -0.013986846 -0.07743395 -0.020619293
## 6 2010-07-31 0.06830056 0.07258261 0.109324567 0.06730920 0.116104008
## TLT IYR GLD Mkt-RF SMB HML RF
## 1 -0.003424699 0.05457048 0.032748219 0.0339 0.0118 0.0318 0e+00
## 2 -0.020572105 0.09748515 -0.004386396 0.0630 0.0146 0.0219 1e-04
## 3 0.033217315 0.06388081 0.058834363 0.0199 0.0484 0.0296 1e-04
## 4 0.051083516 -0.05683535 0.030513147 -0.0790 0.0013 -0.0248 1e-04
## 5 0.057978631 -0.04670103 0.023553189 -0.0556 -0.0179 -0.0473 1e-04
## 6 -0.009464227 0.09404763 -0.050871157 0.0692 0.0022 -0.0050 1e-04
window_data <- data_all %>%
filter(Date >= as.Date("2020-03-31") & Date <= as.Date("2025-02-28"))
nrow(window_data) # should be ~60
## [1] 62
R <- as.matrix(window_data[, tickers])
Sigma <- cov(R)
one <- rep(1, ncol(R))
w_mvp <- solve(Sigma) %*% one / as.numeric(t(one) %*% solve(Sigma) %*% one)
w_mvp
## [,1]
## SPY 1.50933140
## QQQ -0.72082250
## EEM 0.08843009
## IWM 0.01255989
## EFA -0.20232062
## TLT 0.52190481
## IYR -0.52336297
## GLD 0.31427990
betas <- map_dfr(tickers, function(tk) {
df <- window_data %>%
select(all_of(tk), `Mkt-RF`, SMB, HML) %>%
drop_na()
model <- lm(df[[1]] ~ ., data = df[, -1])
tibble(asset = tk, t(coef(model)[-1]))
})
betas
## # A tibble: 8 × 2
## asset `t(coef(model)[-1])`[,1] [,2] [,3]
## <chr> <dbl> <dbl> <dbl>
## 1 SPY 0.890 -0.151 -0.118
## 2 QQQ 1.01 -0.0883 -0.497
## 3 EEM 0.654 0.0176 0.0532
## 4 IWM 0.963 0.734 0.0644
## 5 EFA 0.778 -0.179 0.0604
## 6 TLT 0.316 -0.0511 -0.286
## 7 IYR 0.883 0.0433 0.0461
## 8 GLD 0.250 -0.288 0.0365
Sigma_f <- cov(window_data %>% select(`Mkt-RF`, SMB, HML))
B <- as.matrix(betas[, -1])
Sigma_ff <- B %*% Sigma_f %*% t(B)
# Regularization (prevents singular matrix)
Sigma_ff <- Sigma_ff + diag(1e-6, nrow(Sigma_ff))
one <- rep(1, ncol(Sigma_ff))
w_mvp_ff <- solve(Sigma_ff) %*% one / as.numeric(t(one) %*% solve(Sigma_ff) %*% one)
w_mvp_ff
## [,1]
## [1,] -0.2301041
## [2,] -0.3050892
## [3,] 0.2070706
## [4,] 0.1900895
## [5,] -0.1103467
## [6,] 0.7426309
## [7,] -0.1263138
## [8,] 0.6320629
march <- data_all %>% filter(Date == as.Date("2025-03-31"))
r_march <- as.numeric(march[, tickers])
ret_capm <- sum(w_mvp * r_march)
ret_ff <- sum(w_mvp_ff * r_march)
ret_capm
## [1] 0.005992885
ret_ff
## [1] 0.07879768
window_april <- data_all %>%
filter(Date >= as.Date("2020-04-30") & Date <= as.Date("2025-03-31"))
R2 <- as.matrix(window_april[, tickers])
Sigma2 <- cov(R2)
one <- rep(1, ncol(R2))
w_mvp2 <- solve(Sigma2) %*% one / as.numeric(t(one) %*% solve(Sigma2) %*% one)
april <- data_all %>% filter(Date == as.Date("2025-04-30"))
r_april <- as.numeric(april[, tickers])
ret_april <- sum(w_mvp2 * r_april)
ret_april
## [1] -0.001650694
# Skip metadata rows (French files usually need this)
data_raw <- read_csv("6_Portfolios_2x3.csv", skip = 11)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 8899 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): The portfolios include utilities and include financials.
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(data_raw)
## # A tibble: 6 × 1
## `The portfolios include utilities and include financials.`
## <chr>
## 1 Average Value Weighted Returns -- Monthly
## 2 ,SMALL LoBM,ME1 BM2,SMALL HiBM,BIG LoBM,ME2 BM2,BIG HiBM
## 3 192607, 1.0866, 0.8807, -0.1275, 5.5746, 1.9060, 2.0068
## 4 192608, 0.7831, 1.4677, 5.4422, 2.7268, 2.7028, 5.6834
## 5 192609, -2.8045, -0.0599, -0.4399, 1.4777, 0.0954, -0.7872
## 6 192610, -4.0289, -4.3615, -2.0128, -3.6327, -2.3451, -4.0040
# Rename first column
colnames(data_raw)[1] <- "Date"
# Remove footer rows (non-numeric dates)
data <- data_raw %>%
filter(!is.na(as.numeric(Date)))
## Warning: There was 1 warning in `filter()`.
## ℹ In argument: `!is.na(as.numeric(Date))`.
## Caused by warning:
## ! NAs introduced by coercion
# Convert Date
data$Date <- as.Date(paste0(data$Date, "01"), format="%Y%m%d")
# Convert everything else safely
data[,-1] <- data[,-1] %>%
mutate(across(everything(), ~as.numeric(.)))
# Drop NA
data <- na.omit(data)
str(data)
## tibble [0 × 1] (S3: tbl_df/tbl/data.frame)
## $ Date: 'Date' num(0)
midpoint <- floor(nrow(data) / 2)
data_1 <- data[1:midpoint, ]
data_2 <- data[(midpoint + 1):nrow(data), ]
compute_stats <- function(df) {
df %>%
summarise(across(-Date, list(
mean = ~mean(.),
sd = ~sd(.),
skew = ~moments::skewness(.),
kurt = ~moments::kurtosis(.)
), .names = "{col}_{fn}"))
}
stats_1 <- compute_stats(data_1)
stats_2 <- compute_stats(data_2)
stats_1
## # A tibble: 1 × 0
stats_2
## # A tibble: 1 × 0
comparison <- bind_rows(
Period1 = stats_1,
Period2 = stats_2,
.id = "Period"
)
comparison
## # A tibble: 2 × 1
## Period
## <chr>
## 1 Period1
## 2 Period2
# Make sure there are columns to pivot
if(ncol(data) > 1){
data_long <- data %>%
pivot_longer(cols = -Date,
names_to = "Portfolio",
values_to = "Return")
ggplot(data_long, aes(x = Return, fill = Portfolio)) +
geom_histogram(bins = 50, alpha = 0.6) +
facet_wrap(~Portfolio, scales = "free") +
theme_minimal()
} else {
print("Data format issue: no columns available for plotting.")
}
## [1] "Data format issue: no columns available for plotting."
The summary statistics differ across the two periods in mean, volatility, and higher moments. This suggests the return distributions are not stable over time, indicating the two subsamples likely do not come from the same distribution.
Er_p <- 0.11
sigma_p <- 0.15
rf <- 0.05
target_return <- 0.08
y <- (target_return - rf) / (Er_p - rf)
sigma_c <- y * sigma_p
y2 <- 0.12 / sigma_p
y
## [1] 0.5
sigma_c
## [1] 0.075
y2
## [1] 0.8
Er_m <- 0.12
sigma_m <- 0.20
rf <- 0.05
y <- 0.10 / sigma_m
Er_c <- rf + y * (Er_m - rf)
y
## [1] 0.5
Er_c
## [1] 0.085
risk_premium <- 0.10
rf <- 0.06
sigma_p <- 0.14
w_risky <- 0.6
w_rf <- 0.4
Er_p <- rf + risk_premium
Er_c <- w_risky * Er_p + w_rf * rf
sigma_c <- w_risky * sigma_p
Er_p
## [1] 0.16
Er_c
## [1] 0.12
sigma_c
## [1] 0.084
Er_A <- 0.10
sigma_A <- 0.05
Er_B <- 0.15
sigma_B <- 0.10
w_A <- sigma_B / (sigma_A + sigma_B)
w_B <- 1 - w_A
rf <- w_A * Er_A + w_B * Er_B
w_A
## [1] 0.6666667
w_B
## [1] 0.3333333
rf
## [1] 0.1166667
w_old <- 0.9
w_abc <- 0.1
Er_old <- 0.0067
Er_abc <- 0.0125
sigma_old <- 0.0237
sigma_abc <- 0.0295
rho <- 0.4
Er_new <- w_old * Er_old + w_abc * Er_abc
cov_ab <- rho * sigma_old * sigma_abc
var_new <- (w_old^2 * sigma_old^2) +
(w_abc^2 * sigma_abc^2) +
(2 * w_old * w_abc * cov_ab)
sigma_new <- sqrt(var_new)
Er_new
## [1] 0.00728
sigma_new
## [1] 0.02267179
rf <- 0.0042
Er_tbill <- w_old * Er_old + w_abc * rf
sigma_tbill <- w_old * sigma_old
Er_tbill
## [1] 0.00645
sigma_tbill
## [1] 0.02133
stocks <- data.frame(
name = c("A","B","C"),
Eri = c(0.12, 0.10, 0.14),
beta = c(1.2, 0.8, 1.5),
residual_var = c(0.04, 0.03, 0.05)
)
rf <- 0.05
Er_m <- 0.12
stocks$alpha <- stocks$Eri - (rf + stocks$beta * (Er_m - rf))
stocks$alpha_ratio <- stocks$alpha / stocks$residual_var
stocks <- stocks[order(-stocks$alpha_ratio), ]
stocks
## name Eri beta residual_var alpha alpha_ratio
## 2 B 0.10 0.8 0.03 -0.006 -0.20
## 3 C 0.14 1.5 0.05 -0.015 -0.30
## 1 A 0.12 1.2 0.04 -0.014 -0.35