# Install packages (only run once — comment out after first run)
# install.packages(c("broom", "tidyverse"))
library(broom)
library(tidyverse)data <- read.csv("~/Downloads/data.csv", stringsAsFactors = FALSE)
# Parse dates — handles formats like "4-Jan-11" and "2011-01-04"
data$date <- as.Date(data$date, tryFormats = c("%d-%b-%y", "%Y-%m-%d", "%m/%d/%Y"))
# Quick preview
glimpse(data)## Rows: 7,542
## Columns: 6
## $ symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL",…
## $ date <date> 2011-01-04, 2011-01-05, 2011-01-06, 2011-01-07, 2011-01-10, 20…
## $ ri <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.0186…
## $ MKT <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.0…
## $ SMB <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.002…
## $ HML <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.00…
For each stock (symbol), estimate the Fama-French
three-factor model:
\[r_i = \alpha + \beta_{MKT} \cdot MKT + \beta_{SMB} \cdot SMB + \beta_{HML} \cdot HML + \varepsilon\]
betas <- data %>%
group_by(symbol) %>%
nest() %>%
mutate(
model = map(data, ~ lm(ri ~ MKT + SMB + HML, data = .x)),
tidy_model = map(model, tidy)
) %>%
unnest(tidy_model) %>%
# Drop the intercept row; keep only factor betas
filter(term != "(Intercept)") %>%
select(symbol, term, estimate) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
rename(
b_MKT = MKT,
b_SMB = SMB,
b_HML = HML
)
head(betas)## # A tibble: 6 × 4
## # Groups: symbol [6]
## symbol b_MKT b_SMB b_HML
## <chr> <dbl> <dbl> <dbl>
## 1 AAPL 0.900 0.0685 -0.0578
## 2 FORD 0.513 -0.264 0.138
## 3 GE 1.08 0.0994 0.0902
## 4 GM 1.29 0.00390 -0.0222
## 5 IBM 0.817 0.0336 -0.0121
## 6 MSFT 0.966 0.0582 -0.0641
# Merge estimated betas back into the original data
data_with_betas <- data %>%
left_join(betas, by = "symbol")For each date \(t\), regress realized returns on the pre-estimated betas:
\[r_{i,t} = \lambda_0 + \lambda_{MKT} \hat{\beta}_{MKT,i} + \lambda_{SMB} \hat{\beta}_{SMB,i} + \lambda_{HML} \hat{\beta}_{HML,i} + \varepsilon_{i,t}\]
risk_premiums <- data_with_betas %>%
group_by(date) %>%
nest() %>%
mutate(
model = map(data, ~ lm(ri ~ b_MKT + b_SMB + b_HML, data = .x)),
tidy_model = map(model, tidy)
) %>%
unnest(tidy_model) %>%
# Keep only the slope estimates (risk premiums); drop the intercept
filter(term != "(Intercept)") %>%
select(date, term, estimate) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
rename(
lambda_MKT = b_MKT,
lambda_SMB = b_SMB,
lambda_HML = b_HML
)
head(risk_premiums)## # A tibble: 6 × 4
## # Groups: date [6]
## date lambda_MKT lambda_SMB lambda_HML
## <date> <dbl> <dbl> <dbl>
## 1 2011-01-04 0.0416 -0.0255 0.0574
## 2 2011-01-05 -0.0113 -0.158 0.0628
## 3 2011-01-06 0.0373 0.00703 -0.173
## 4 2011-01-07 0.0127 0.0323 -0.0642
## 5 2011-01-10 -0.0366 0.0171 0.0586
## 6 2011-01-11 0.00409 -0.0954 0.0899
t_MKT <- t.test(risk_premiums$lambda_MKT, mu = 0)
t_SMB <- t.test(risk_premiums$lambda_SMB, mu = 0)
t_HML <- t.test(risk_premiums$lambda_HML, mu = 0)
print(t_MKT)##
## One Sample t-test
##
## data: risk_premiums$lambda_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002546371 0.001722208
## sample estimates:
## mean of x
## -0.0004120813
##
## One Sample t-test
##
## data: risk_premiums$lambda_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003711466 0.011076953
## sample estimates:
## mean of x
## 0.003682744
##
## One Sample t-test
##
## data: risk_premiums$lambda_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005541205 0.004607776
## sample estimates:
## mean of x
## -0.0004667146
summary_tbl <- tibble(
Factor = c("MKT", "SMB", "HML"),
Avg_Lambda = avg_premiums,
t_stat = c(t_MKT$statistic,
t_SMB$statistic,
t_HML$statistic),
p_value = c(t_MKT$p.value,
t_SMB$p.value,
t_HML$p.value),
Significant = ifelse(
c(t_MKT$p.value, t_SMB$p.value, t_HML$p.value) < 0.05,
"Yes", "No"
)
)
knitr::kable(
summary_tbl,
digits = 4,
caption = "Fama-MacBeth Risk Premium Estimates"
)| Factor | Avg_Lambda | t_stat | p_value | Significant |
|---|---|---|---|---|
| MKT | -0.0004 | -0.3788 | 0.7049 | No |
| SMB | 0.0037 | 0.9771 | 0.3287 | No |
| HML | -0.0005 | -0.1804 | 0.8568 | No |