The Fama-MacBeth (1973) procedure estimates risk premia for systematic factors by correcting standard errors for cross-sectional correlation. It is the standard approach when we have more cross-sections than time periods.
Two-step procedure:
| Step | Action | Output |
|---|---|---|
| Step 0 | N time-series regressions (one per asset) | Factor betas \(\hat{\beta}_i\) |
| Step 1 | T cross-sectional regressions (one per day) | Daily premia \(\hat{\lambda}_t\) |
| Step 2 | Average premia + t-test vs. \(\mu = 0\) | Risk-premium inference |
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
library(knitr)
theme_set(
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 13),
plot.subtitle = element_text(colour = "grey50", size = 11),
panel.grid.minor = element_blank(),
axis.title = element_text(size = 11),
legend.position = "bottom"
)
)data <- read.csv("data.csv")
# Rename tickers to full company names
data <- data %>%
mutate(company = recode(symbol,
"AAPL" = "Apple Inc.",
"FORD" = "Ford Motor Co.",
"GE" = "General Electric",
"GM" = "General Motors",
"IBM" = "IBM Corp.",
"MSFT" = "Microsoft Corp."
))
glimpse(data)## Rows: 7,542
## Columns: 7
## $ symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL"…
## $ date <chr> "4-Jan-11", "5-Jan-11", "6-Jan-11", "7-Jan-11", "10-Jan-11", "…
## $ ri <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.018…
## $ MKT <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.…
## $ SMB <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.00…
## $ HML <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.0…
## $ company <chr> "Apple Inc.", "Apple Inc.", "Apple Inc.", "Apple Inc.", "Apple…
data %>%
group_by(company, symbol) %>%
summarise(
N = n(),
Mean_ri = round(mean(ri), 6),
SD_ri = round(sd(ri), 6),
Mean_MKT = round(mean(MKT), 6),
Mean_SMB = round(mean(SMB), 6),
Mean_HML = round(mean(HML), 6),
.groups = "drop"
) %>%
kable(caption = "Descriptive statistics by company")| company | symbol | N | Mean_ri | SD_ri | Mean_MKT | Mean_SMB | Mean_HML |
|---|---|---|---|---|---|---|---|
| Apple Inc. | AAPL | 1257 | 0.000697 | 0.016804 | 0.000377 | 2e-06 | 0.00013 |
| Ford Motor Co. | FORD | 1257 | -0.000583 | 0.055493 | 0.000377 | 2e-06 | 0.00013 |
| General Electric | GE | 1257 | 0.000561 | 0.013450 | 0.000377 | 2e-06 | 0.00013 |
| General Motors | GM | 1257 | -0.000008 | 0.018947 | 0.000377 | 2e-06 | 0.00013 |
| IBM Corp. | IBM | 1257 | -0.000055 | 0.012213 | 0.000377 | 2e-06 | 0.00013 |
| Microsoft Corp. | MSFT | 1257 | 0.000654 | 0.014791 | 0.000377 | 2e-06 | 0.00013 |
For each of the N = 6 companies, regress daily excess returns \(r_{i,t}\) on the three Fama-French factors:
\[r_{i,t} = \alpha_i + \beta_i^{MKT} \cdot MKT_t + \beta_i^{SMB} \cdot SMB_t + \beta_i^{HML} \cdot HML_t + \varepsilon_{i,t}\]
step0 <- data %>%
nest(data = c(date, ri, MKT, SMB, HML)) %>%
mutate(
model = map(data, ~ lm(ri ~ MKT + SMB + HML, data = .x)),
estimates = map(model, tidy),
glance_ = map(model, glance)
) %>%
unnest(estimates) %>%
select(symbol, company, estimate, term) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
rename(
alpha = `(Intercept)`,
b_MKT = MKT,
b_SMB = SMB,
b_HML = HML
)
# Extract R-squared separately
r2_df <- data %>%
nest(data = c(date, ri, MKT, SMB, HML)) %>%
mutate(r2 = map_dbl(data, ~ summary(lm(ri ~ MKT + SMB + HML, data = .x))$r.squared)) %>%
select(symbol, r2)
step0 <- step0 %>% left_join(r2_df, by = "symbol")
step0 %>%
select(company, symbol, alpha, b_MKT, b_SMB, b_HML, r2) %>%
mutate(across(where(is.numeric), ~ round(.x, 4))) %>%
rename(
Company = company,
Ticker = symbol,
Alpha = alpha,
`β MKT` = b_MKT,
`β SMB` = b_SMB,
`β HML` = b_HML,
`R²` = r2
) %>%
kable(caption = "Step 0: Estimated factor betas per company")| Company | Ticker | Alpha | β MKT | β SMB | β HML | R² |
|---|---|---|---|---|---|---|
| Apple Inc. | AAPL | 4e-04 | 0.9000 | 0.0685 | -0.0578 | 0.2729 |
| Ford Motor Co. | FORD | -8e-04 | 0.5129 | -0.2644 | 0.1380 | 0.0090 |
| General Electric | GE | 1e-04 | 1.0779 | 0.0994 | 0.0902 | 0.6125 |
| General Motors | GM | -5e-04 | 1.2854 | 0.0039 | -0.0222 | 0.4379 |
| IBM Corp. | IBM | -4e-04 | 0.8169 | 0.0336 | -0.0121 | 0.4255 |
| Microsoft Corp. | MSFT | 3e-04 | 0.9656 | 0.0582 | -0.0641 | 0.4054 |
step0 %>%
select(company, b_MKT, b_SMB, b_HML) %>%
pivot_longer(cols = starts_with("b_"),
names_to = "factor",
values_to = "beta") %>%
mutate(
factor = recode(factor,
b_MKT = "β MKT", b_SMB = "β SMB", b_HML = "β HML"),
company = stringr::str_wrap(company, 12)
) %>%
ggplot(aes(x = company, y = beta, fill = factor)) +
geom_col(position = position_dodge(0.75), width = 0.65) +
geom_hline(yintercept = 0, linewidth = 0.4, colour = "grey40") +
scale_fill_manual(
values = c("β MKT" = "#185FA5", "β SMB" = "#0F6E56", "β HML" = "#993C1D"),
name = NULL
) +
labs(
title = "Factor betas by company",
subtitle = "Estimated from full-sample time-series regressions",
x = NULL, y = "Beta coefficient"
)Merge betas back onto the panel, then for each of the T = 1257 trading days run a cross-sectional regression of returns on betas:
\[r_{i,t} = \lambda_t^0 + \lambda_t^{MKT} \hat{\beta}_i^{MKT} + \lambda_t^{SMB} \hat{\beta}_i^{SMB} + \lambda_t^{HML} \hat{\beta}_i^{HML} + u_{i,t}\]
# Merge betas onto full panel
panel <- data %>%
left_join(step0 %>% select(symbol, b_MKT, b_SMB, b_HML), by = "symbol")
# Run cross-sectional regression for every date
step1 <- panel %>%
nest(data = c(symbol, company, ri, b_MKT, b_SMB, b_HML)) %>%
mutate(
estimates = map(data, ~ tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x)))
) %>%
unnest(estimates) %>%
select(date, estimate, term) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
rename(
intercept = `(Intercept)`,
lambda_MKT = b_MKT,
lambda_SMB = b_SMB,
lambda_HML = b_HML
)
head(step1, 10) %>%
mutate(across(where(is.numeric), ~ round(.x, 6))) %>%
kable(caption = "Step 1: First 10 daily cross-sectional premia (λ̂_t)")| date | intercept | lambda_MKT | lambda_SMB | lambda_HML |
|---|---|---|---|---|
| 4-Jan-11 | -0.029761 | 0.041629 | -0.025520 | 0.057372 |
| 5-Jan-11 | 0.022334 | -0.011347 | -0.158046 | 0.062847 |
| 6-Jan-11 | -0.029243 | 0.037301 | 0.007029 | -0.173234 |
| 7-Jan-11 | -0.017503 | 0.012722 | 0.032269 | -0.064226 |
| 10-Jan-11 | 0.036414 | -0.036631 | 0.017123 | 0.058646 |
| 11-Jan-11 | 0.002744 | 0.004089 | -0.095361 | 0.089858 |
| 12-Jan-11 | 0.072331 | -0.055365 | -0.164496 | 0.043036 |
| 13-Jan-11 | 0.015027 | -0.019357 | 0.001815 | 0.025630 |
| 14-Jan-11 | 0.019775 | -0.016486 | 0.063259 | 0.039214 |
| 18-Jan-11 | -0.018911 | 0.010146 | 0.052508 | -0.090027 |
step1 %>%
select(date, lambda_MKT, lambda_SMB, lambda_HML) %>%
pivot_longer(cols = starts_with("lambda_"),
names_to = "factor",
values_to = "lambda") %>%
mutate(factor = recode(factor,
lambda_MKT = "λ MKT", lambda_SMB = "λ SMB", lambda_HML = "λ HML")) %>%
ggplot(aes(x = lambda, fill = factor, colour = factor)) +
geom_histogram(bins = 50, alpha = 0.55, position = "identity") +
geom_vline(xintercept = 0, linewidth = 0.5, colour = "grey30", linetype = "dashed") +
scale_fill_manual(
values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
name = NULL
) +
scale_colour_manual(
values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
name = NULL
) +
facet_wrap(~ factor, scales = "free_x") +
labs(
title = "Distribution of daily cross-sectional risk premia",
subtitle = "Each observation is one trading day's estimated λ̂_t",
x = "Daily premium (λ)", y = "Count"
) +
theme(legend.position = "none")Average each daily premium series and test \(H_0: \bar{\lambda} = 0\) via a one-sample
t-test. This is exactly the t.test(step1$b_MKT, mu = 0)
call from the original R script.
mkt_test <- t.test(step1$lambda_MKT, mu = 0)
smb_test <- t.test(step1$lambda_SMB, mu = 0)
hml_test <- t.test(step1$lambda_HML, mu = 0)
mkt_test##
## One Sample t-test
##
## data: step1$lambda_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002546371 0.001722208
## sample estimates:
## mean of x
## -0.0004120813
##
## One Sample t-test
##
## data: step1$lambda_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003711466 0.011076953
## sample estimates:
## mean of x
## 0.003682744
##
## One Sample t-test
##
## data: step1$lambda_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005541205 0.004607776
## sample estimates:
## mean of x
## -0.0004667146
fm_results <- tibble(
Factor = c("Market (λ MKT)", "Size (λ SMB)", "Value (λ HML)"),
`Mean λ` = c(mean(step1$lambda_MKT), mean(step1$lambda_SMB), mean(step1$lambda_HML)),
`Std Dev` = c(sd(step1$lambda_MKT), sd(step1$lambda_SMB), sd(step1$lambda_HML)),
`Std Error` = c(mkt_test$stderr, smb_test$stderr, hml_test$stderr),
`t-stat` = c(mkt_test$statistic, smb_test$statistic, hml_test$statistic),
`p-value` = c(mkt_test$p.value, smb_test$p.value, hml_test$p.value),
`CI lower` = c(mkt_test$conf.int[1], smb_test$conf.int[1], hml_test$conf.int[1]),
`CI upper` = c(mkt_test$conf.int[2], smb_test$conf.int[2], hml_test$conf.int[2]),
Significant = c(
ifelse(mkt_test$p.value < 0.05, "Yes ✓", "No"),
ifelse(smb_test$p.value < 0.05, "Yes ✓", "No"),
ifelse(hml_test$p.value < 0.05, "Yes ✓", "No")
)
) %>%
mutate(across(where(is.numeric), ~ round(.x, 6)))
kable(fm_results,
caption = "Fama-MacBeth results — one-sample t-tests vs. μ = 0 (95% confidence)")| Factor | Mean λ | Std Dev | Std Error | t-stat | p-value | CI lower | CI upper | Significant |
|---|---|---|---|---|---|---|---|---|
| Market (λ MKT) | -0.000412 | 0.038570 | 0.001088 | -0.378788 | 0.704909 | -0.002546 | 0.001722 | No |
| Size (λ SMB) | 0.003683 | 0.133626 | 0.003769 | 0.977117 | 0.328699 | -0.003711 | 0.011077 | No |
| Value (λ HML) | -0.000467 | 0.091705 | 0.002587 | -0.180437 | 0.856839 | -0.005541 | 0.004608 | No |
tibble(
factor = c("λ MKT", "λ SMB", "λ HML"),
mean = c(mean(step1$lambda_MKT), mean(step1$lambda_SMB), mean(step1$lambda_HML)),
ci_lo = c(mkt_test$conf.int[1], smb_test$conf.int[1], hml_test$conf.int[1]),
ci_hi = c(mkt_test$conf.int[2], smb_test$conf.int[2], hml_test$conf.int[2]),
sig = c(mkt_test$p.value < 0.05, smb_test$p.value < 0.05, hml_test$p.value < 0.05)
) %>%
ggplot(aes(x = factor, y = mean, colour = factor)) +
geom_hline(yintercept = 0, linewidth = 0.5, colour = "grey40", linetype = "dashed") +
geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi), width = 0.15, linewidth = 1) +
geom_point(size = 4) +
scale_colour_manual(
values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
guide = "none"
) +
labs(
title = "Fama-MacBeth risk premia with 95% confidence intervals",
subtitle = "Dashed line = 0 (null hypothesis); error bars = 95% CI from t-test",
x = NULL, y = "Average daily premium (λ̄)"
)## Market factor (λ MKT): mean = -0.000412, t = -0.3788, p = 0.7049 → Not significant at 5%
## Size factor (λ SMB): mean = 0.003683, t = 0.9771, p = 0.3287 → Not significant at 5%
## Value factor (λ HML): mean = -0.000467, t = -0.1804, p = 0.8568 → Not significant at 5%
Key takeaways:
Methodology: Fama, E.F. & MacBeth, J.D. (1973). Risk, return, and equilibrium: Empirical tests. Journal of Political Economy, 81(3), 607–636.