Fama-MacBeth (1973) regression is a two-pass method widely used in empirical asset pricing to estimate risk premia. It involves:
# Install if needed:
# install.packages(c("tidyverse", "broom", "kableExtra", "ggplot2"))
library(tidyverse)
library(broom)
library(kableExtra)
library(ggplot2)We simulate a panel dataset of 30 stocks over 60 months with:
set.seed(42)
n_stocks <- 30
n_months <- 60
stock_ids <- paste0("Stock_", str_pad(1:n_stocks, 2, pad = "0"))
months <- seq(as.Date("2019-01-01"), by = "month", length.out = n_months)
# True betas for each stock (randomly assigned)
true_beta_mkt <- runif(n_stocks, 0.5, 1.8)
true_beta_smb <- runif(n_stocks, -0.5, 1.2)
true_beta_hml <- runif(n_stocks, -0.3, 1.0)
# Factor returns (monthly)
Mkt_RF <- rnorm(n_months, mean = 0.005, sd = 0.04)
SMB <- rnorm(n_months, mean = 0.002, sd = 0.02)
HML <- rnorm(n_months, mean = 0.003, sd = 0.02)
factors <- tibble(Date = months, Mkt_RF, SMB, HML)
# Build panel
panel <- map_dfr(1:n_stocks, function(i) {
eps <- rnorm(n_months, 0, 0.03)
Ret <- 0.001 +
true_beta_mkt[i] * Mkt_RF +
true_beta_smb[i] * SMB +
true_beta_hml[i] * HML +
eps
tibble(
Date = months,
Stock = stock_ids[i],
Ret = Ret,
Mkt_RF = Mkt_RF,
SMB = SMB,
HML = HML
)
})
head(panel, 10) %>%
kable(caption = "Sample Panel Data (first 10 rows)", digits = 4) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| Date | Stock | Ret | Mkt_RF | SMB | HML |
|---|---|---|---|---|---|
| 2019-01-01 | Stock_01 | 0.0549 | 0.0223 | 0.0041 | 0.0316 |
| 2019-02-01 | Stock_01 | -0.0445 | -0.0275 | -0.0064 | -0.0169 |
| 2019-03-01 | Stock_01 | 0.1066 | 0.0628 | -0.0004 | 0.0121 |
| 2019-04-01 | Stock_01 | -0.0324 | -0.0123 | 0.0058 | 0.0047 |
| 2019-05-01 | Stock_01 | 0.1067 | 0.0312 | 0.0044 | 0.0209 |
| 2019-06-01 | Stock_01 | 0.0233 | 0.0179 | 0.0015 | -0.0016 |
| 2019-07-01 | Stock_01 | -0.0005 | -0.0264 | 0.0042 | 0.0197 |
| 2019-08-01 | Stock_01 | 0.0556 | 0.0680 | -0.0077 | -0.0319 |
| 2019-09-01 | Stock_01 | 0.0541 | 0.0307 | -0.0081 | 0.0368 |
| 2019-10-01 | Stock_01 | -0.0044 | 0.0086 | -0.0312 | 0.0203 |
For each stock \(i\), estimate:
\[R_{i,t} = \alpha_i + \beta_{i,Mkt} \cdot MktRF_t + \beta_{i,SMB} \cdot SMB_t + \beta_{i,HML} \cdot HML_t + \varepsilon_{i,t}\]
ts_betas <- panel %>%
group_by(Stock) %>%
do(tidy(lm(Ret ~ Mkt_RF + SMB + HML, data = .))) %>%
ungroup() %>%
filter(term != "(Intercept)") %>%
select(Stock, term, estimate) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
rename(
beta_mkt = Mkt_RF,
beta_smb = SMB,
beta_hml = HML
)
ts_betas %>%
kable(caption = "Step 1: Estimated Betas from Time-Series Regressions",
digits = 4) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| Stock | beta_mkt | beta_smb | beta_hml |
|---|---|---|---|
| Stock_01 | 1.7447 | 0.6593 | 0.2079 |
| Stock_02 | 1.6424 | 0.7349 | 0.9703 |
| Stock_03 | 0.8398 | 0.5639 | 0.5662 |
| Stock_04 | 1.4259 | 0.5597 | 0.3883 |
| Stock_05 | 1.2341 | -0.5386 | 0.5626 |
| Stock_06 | 1.1205 | 0.6327 | 0.0097 |
| Stock_07 | 1.7232 | -0.3079 | 0.1718 |
| Stock_08 | 0.8429 | 0.0192 | 0.8399 |
| Stock_09 | 1.3775 | 0.9612 | 0.5917 |
| Stock_10 | 1.4406 | 0.7269 | -0.1799 |
| Stock_11 | 0.9668 | -0.0182 | -0.3153 |
| Stock_12 | 1.2504 | 0.4373 | 0.0757 |
| Stock_13 | 1.7334 | -0.0387 | -0.1632 |
| Stock_14 | 0.7895 | 0.7222 | 0.0749 |
| Stock_15 | 1.0288 | 0.2734 | -0.1800 |
| Stock_16 | 1.7967 | 1.4308 | 0.7670 |
| Stock_17 | 1.8122 | 0.5999 | -0.2620 |
| Stock_18 | 0.7380 | 0.4687 | 0.1242 |
| Stock_19 | 0.9294 | 1.0571 | 0.0605 |
| Stock_20 | 1.1569 | 0.8242 | -0.2607 |
| Stock_21 | 1.4336 | 0.1210 | 0.3629 |
| Stock_22 | 0.6927 | -0.2357 | 0.0573 |
| Stock_23 | 1.9667 | 0.0954 | 0.2307 |
| Stock_24 | 1.7069 | 0.7653 | 0.5022 |
| Stock_25 | 0.7308 | -0.0650 | 0.5753 |
| Stock_26 | 0.9496 | 0.5888 | 0.0857 |
| Stock_27 | 1.0941 | 0.5471 | -0.1466 |
| Stock_28 | 1.7588 | -0.0105 | 0.0124 |
| Stock_29 | 1.1908 | -0.3107 | -0.1833 |
| Stock_30 | 1.5925 | 0.5303 | 0.0903 |
ts_betas_long <- ts_betas %>%
pivot_longer(cols = starts_with("beta_"), names_to = "Factor", values_to = "Beta") %>%
mutate(Factor = recode(Factor,
"beta_mkt" = "Market Beta",
"beta_smb" = "SMB Beta",
"beta_hml" = "HML Beta"
))
ggplot(ts_betas_long, aes(x = Beta, fill = Factor)) +
geom_histogram(bins = 12, color = "white", alpha = 0.85) +
facet_wrap(~Factor, scales = "free") +
scale_fill_manual(values = c("#2196F3", "#4CAF50", "#FF5722")) +
labs(
title = "Distribution of Estimated Betas Across Stocks",
subtitle = "Step 1: Time-Series Regression Results",
x = "Beta Estimate", y = "Count"
) +
theme_minimal(base_size = 13) +
theme(legend.position = "none",
plot.title = element_text(face = "bold"))Merge betas back into the panel, then for each time period \(t\), run:
\[R_{i,t} = \lambda_{0,t} + \lambda_{Mkt,t} \cdot \hat{\beta}_{i,Mkt} + \lambda_{SMB,t} \cdot \hat{\beta}_{i,SMB} + \lambda_{HML,t} \cdot \hat{\beta}_{i,HML} + \alpha_{i,t}\]
# Merge betas with panel returns
panel_with_betas <- panel %>%
left_join(ts_betas, by = "Stock")
# Cross-sectional regression for each month
cs_lambdas <- panel_with_betas %>%
group_by(Date) %>%
do(tidy(lm(Ret ~ beta_mkt + beta_smb + beta_hml, data = .))) %>%
ungroup() %>%
rename(
lambda = estimate,
se_lambda = std.error
)
# Show first few months
cs_lambdas %>%
filter(Date <= as.Date("2019-06-01")) %>%
select(Date, term, lambda, se_lambda, p.value) %>%
kable(caption = "Step 2: Cross-Sectional Lambda Estimates (first 6 months)",
digits = 5) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))| Date | term | lambda | se_lambda | p.value |
|---|---|---|---|---|
| 2019-01-01 | (Intercept) | 0.02076 | 0.01996 | 0.30789 |
| 2019-01-01 | beta_mkt | 0.00986 | 0.01479 | 0.51088 |
| 2019-01-01 | beta_smb | 0.00195 | 0.01265 | 0.87860 |
| 2019-01-01 | beta_hml | 0.02888 | 0.01642 | 0.09045 |
| 2019-02-01 | (Intercept) | -0.02264 | 0.02145 | 0.30101 |
| 2019-02-01 | beta_mkt | -0.01391 | 0.01590 | 0.38980 |
| 2019-02-01 | beta_smb | -0.00894 | 0.01359 | 0.51641 |
| 2019-02-01 | beta_hml | 0.00114 | 0.01765 | 0.94916 |
| 2019-03-01 | (Intercept) | -0.00249 | 0.02312 | 0.91490 |
| 2019-03-01 | beta_mkt | 0.05821 | 0.01714 | 0.00220 |
| 2019-03-01 | beta_smb | -0.00993 | 0.01465 | 0.50385 |
| 2019-03-01 | beta_hml | 0.04551 | 0.01902 | 0.02426 |
| 2019-04-01 | (Intercept) | 0.00711 | 0.02259 | 0.75544 |
| 2019-04-01 | beta_mkt | -0.01968 | 0.01674 | 0.25053 |
| 2019-04-01 | beta_smb | 0.02024 | 0.01431 | 0.16914 |
| 2019-04-01 | beta_hml | -0.02143 | 0.01859 | 0.25946 |
| 2019-05-01 | (Intercept) | 0.00690 | 0.02228 | 0.75918 |
| 2019-05-01 | beta_mkt | 0.01672 | 0.01651 | 0.32069 |
| 2019-05-01 | beta_smb | 0.00997 | 0.01411 | 0.48607 |
| 2019-05-01 | beta_hml | 0.04227 | 0.01833 | 0.02932 |
| 2019-06-01 | (Intercept) | 0.01871 | 0.02033 | 0.36585 |
| 2019-06-01 | beta_mkt | 0.00941 | 0.01507 | 0.53779 |
| 2019-06-01 | beta_smb | -0.00368 | 0.01288 | 0.77727 |
| 2019-06-01 | beta_hml | -0.01558 | 0.01673 | 0.36036 |
cs_lambdas %>%
filter(term != "(Intercept)") %>%
mutate(term = recode(term,
"beta_mkt" = "λ Market",
"beta_smb" = "λ SMB",
"beta_hml" = "λ HML"
)) %>%
ggplot(aes(x = Date, y = lambda, color = term)) +
geom_line(size = 0.8) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
facet_wrap(~term, ncol = 1, scales = "free_y") +
scale_color_manual(values = c("#2196F3", "#4CAF50", "#FF5722")) +
labs(
title = "Step 2: Time Series of Cross-Sectional Lambda Estimates",
x = "Date", y = "Lambda (Risk Premium)"
) +
theme_minimal(base_size = 12) +
theme(legend.position = "none",
plot.title = element_text(face = "bold"))Average the \(T\) cross-sectional lambdas. The Fama-MacBeth standard error is:
\[SE_{FM}(\hat{\lambda}) = \frac{s(\hat{\lambda}_t)}{\sqrt{T}}\]
where \(s(\hat{\lambda}_t)\) is the time-series standard deviation of the cross-sectional estimates.
fm_results <- cs_lambdas %>%
group_by(term) %>%
summarise(
FM_Estimate = mean(lambda),
FM_Std_Dev = sd(lambda),
T = n(),
FM_SE = sd(lambda) / sqrt(n()),
FM_t_stat = mean(lambda) / (sd(lambda) / sqrt(n())),
FM_p_value = 2 * pt(-abs(mean(lambda) / (sd(lambda) / sqrt(n()))), df = n() - 1)
) %>%
mutate(
Significance = case_when(
FM_p_value < 0.01 ~ "***",
FM_p_value < 0.05 ~ "**",
FM_p_value < 0.10 ~ "*",
TRUE ~ ""
)
)
fm_results %>%
kable(caption = "Step 3: Fama-MacBeth Final Results",
digits = 5,
col.names = c("Term", "FM Estimate", "Std Dev", "T", "FM Std Error",
"t-Statistic", "p-Value", "Sig.")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered")) %>%
row_spec(which(fm_results$FM_p_value < 0.05), bold = TRUE, color = "white",
background = "#2196F3")| Term | FM Estimate | Std Dev | T | FM Std Error | t-Statistic | p-Value | Sig. |
|---|---|---|---|---|---|---|---|
| (Intercept) | -0.00207 | 0.01567 | 60 | 0.00202 | -1.02444 | 0.30981 | |
| beta_hml | 0.00365 | 0.02550 | 60 | 0.00329 | 1.10966 | 0.27165 | |
| beta_mkt | 0.01305 | 0.03959 | 60 | 0.00511 | 2.55250 | 0.01330 | ** |
| beta_smb | -0.00289 | 0.02035 | 60 | 0.00263 | -1.10051 | 0.27558 |
fm_results %>%
filter(term != "(Intercept)") %>%
mutate(
term = recode(term,
"beta_mkt" = "Market (MKT-RF)",
"beta_smb" = "Size (SMB)",
"beta_hml" = "Value (HML)"
),
Result = paste0(
round(FM_Estimate, 5), " ", Significance,
"\n(", round(FM_SE, 5), ")"
)
) %>%
select(Factor = term, `Risk Premium` = FM_Estimate,
`FM Std Error` = FM_SE, `t-Statistic` = FM_t_stat,
`p-Value` = FM_p_value, Sig. = Significance) %>%
kable(caption = "Fama-MacBeth Risk Premium Estimates (3-Factor Model)",
digits = 5) %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE) %>%
add_header_above(c(" " = 1, "Fama-MacBeth (1973)" = 5))| Factor | Risk Premium | FM Std Error | t-Statistic | p-Value | Sig. |
|---|---|---|---|---|---|
| Value (HML) | 0.00365 | 0.00329 | 1.10966 | 0.27165 | |
| Market (MKT-RF) | 0.01305 | 0.00511 | 2.55250 | 0.01330 | ** |
| Size (SMB) | -0.00289 | 0.00263 | -1.10051 | 0.27558 |
*** p<0.01, **
p<0.05, * p<0.10Note: In real applications, you would use actual stock return data (e.g., from CRSP) and Fama-French factor data (available from Kenneth French’s data library).
Replicated from: The Data Hall — Fama-MacBeth Regression in
R
Reference: Fama, E. F., & MacBeth, J. D. (1973). Risk, return,
and equilibrium: Empirical tests. Journal of Political Economy, 81(3),
607–636.