The Fama-MacBeth (1973) two-pass regression procedure is one of the most widely used methods in empirical asset pricing. It tests whether risk factors — estimated from time-series regressions — can explain the cross-section of expected returns.
“The main advantage of the Fama-MacBeth procedure is that it provides standard errors corrected for cross-sectional correlation of returns.”
| Feature | OLS Panel | Fama-MacBeth |
|---|---|---|
| Handles cross-sectional correlation | ✗ | ✓ |
| Time-varying risk premia | ✗ | ✓ |
| Standard practice in finance | Partial | ✓ |
| Works well when N > T | ✗ | ✓ |
The model we test is:
\[r_{i,t} - r_{f,t} = \alpha_i + \beta_{i,MKT} \cdot MKT_t + \beta_{i,SMB} \cdot SMB_t + \beta_{i,HML} \cdot HML_t + \varepsilon_{i,t}\]
Where:
data <- read.csv("data.csv")
# Preview
head(data, 10) %>%
kbl(caption = "First 10 Rows of Dataset", digits = 6) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)| symbol | date | ri | MKT | SMB | HML |
|---|---|---|---|---|---|
| AAPL | 4-Jan-11 | 0.005206 | -0.001314 | -0.0065 | 0.0008 |
| AAPL | 5-Jan-11 | 0.008146 | 0.004995 | 0.0018 | 0.0013 |
| AAPL | 6-Jan-11 | -0.000808 | -0.002125 | 0.0001 | -0.0025 |
| AAPL | 7-Jan-11 | 0.007136 | -0.001847 | 0.0022 | -0.0006 |
| AAPL | 10-Jan-11 | 0.018657 | -0.001377 | 0.0041 | 0.0039 |
| AAPL | 11-Jan-11 | -0.002368 | 0.003718 | 0.0016 | 0.0036 |
| AAPL | 12-Jan-11 | 0.008104 | 0.008967 | 0.0031 | 0.0000 |
| AAPL | 13-Jan-11 | 0.003652 | -0.001712 | -0.0026 | -0.0044 |
| AAPL | 14-Jan-11 | 0.008067 | 0.007357 | -0.0010 | -0.0073 |
| AAPL | 18-Jan-11 | -0.022725 | 0.001375 | 0.0056 | 0.0015 |
## Dataset Dimensions: 7542 rows x 6 columns
## Stocks: AAPL, FORD, GE, GM, IBM, MSFT
## Date Range: 4-Jan-11 to 31-Dec-15
## Observations per stock: 1257
data %>%
select(ri, MKT, SMB, HML) %>%
summary() %>%
kbl(caption = "Summary Statistics") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE)| ri | MKT | SMB | HML | |
|---|---|---|---|---|
| Min. :-0.3908663 | Min. :-0.0689583 | Min. :-1.660e-02 | Min. :-0.01490 | |
| 1st Qu.:-0.0087263 | 1st Qu.:-0.0040125 | 1st Qu.:-3.100e-03 | 1st Qu.:-0.00260 | |
| Median : 0.0000000 | Median : 0.0005438 | Median : 1.000e-04 | Median : 0.00000 | |
| Mean : 0.0002109 | Mean : 0.0003774 | Mean : 2.227e-06 | Mean : 0.00013 | |
| 3rd Qu.: 0.0093507 | 3rd Qu.: 0.0052641 | 3rd Qu.: 3.100e-03 | 3rd Qu.: 0.00260 | |
| Max. : 0.9614112 | Max. : 0.0463174 | Max. : 2.490e-02 | Max. : 0.02250 |
data %>%
group_by(symbol) %>%
summarise(
N = n(),
Mean_ri = mean(ri),
SD_ri = sd(ri),
Min_ri = min(ri),
Max_ri = max(ri),
Sharpe = mean(ri) / sd(ri) * sqrt(252)
) %>%
mutate(across(where(is.numeric), ~round(., 5))) %>%
kbl(caption = "Per-Stock Return Statistics (Daily)") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
column_spec(7, bold = TRUE, color = ifelse(
data %>% group_by(symbol) %>%
summarise(s = mean(ri)/sd(ri)*sqrt(252)) %>% pull(s) > 0,
"darkgreen", "red"))| symbol | N | Mean_ri | SD_ri | Min_ri | Max_ri | Sharpe |
|---|---|---|---|---|---|---|
| AAPL | 1257 | 0.00070 | 0.01680 | -0.13188 | 0.08502 | 0.65828 |
| FORD | 1257 | -0.00058 | 0.05549 | -0.39087 | 0.96141 | -0.16679 |
| GE | 1257 | 0.00056 | 0.01345 | -0.06765 | 0.10260 | 0.66159 |
| GM | 1257 | -0.00001 | 0.01895 | -0.11544 | 0.09108 | -0.00664 |
| IBM | 1257 | -0.00006 | 0.01221 | -0.08642 | 0.05511 | -0.07155 |
| MSFT | 1257 | 0.00065 | 0.01479 | -0.12103 | 0.09941 | 0.70212 |
ggplot(data, aes(x = ri, y = symbol, fill = symbol)) +
geom_density_ridges(alpha = 0.7, scale = 1.2, quantile_lines = TRUE,
quantiles = c(0.05, 0.5, 0.95)) +
scale_fill_brewer(palette = "Set2") +
scale_x_continuous(labels = percent_format()) +
labs(
title = "Return Distributions by Stock (with 5th, 50th, 95th Percentiles)",
subtitle = "Fama-French Three-Factor Dataset | 2011–2015",
x = "Daily Return",
y = NULL,
caption = "Vertical lines indicate 5%, 50%, 95% quantiles"
) +
theme_few(base_size = 13) +
theme(legend.position = "none")# Get unique dates with factor values
factor_data <- data %>%
distinct(date, MKT, SMB, HML) %>%
mutate(date_num = row_number())
factor_long <- factor_data %>%
pivot_longer(cols = c(MKT, SMB, HML), names_to = "Factor", values_to = "Return")
ggplot(factor_long, aes(x = date_num, y = Return, color = Factor)) +
geom_line(alpha = 0.7) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +
facet_wrap(~Factor, ncol = 1, scales = "free_y") +
scale_color_manual(values = c("MKT" = "#2196F3", "SMB" = "#4CAF50", "HML" = "#FF5722")) +
scale_y_continuous(labels = percent_format()) +
labs(
title = "Fama-French Factor Returns Over Time",
subtitle = "Daily MKT, SMB, HML factors (2011–2015)",
x = "Trading Day",
y = "Factor Return",
caption = "Source: Fama-French Data Library"
) +
theme_few(base_size = 12) +
theme(legend.position = "none")factor_data %>%
mutate(
cum_MKT = cumprod(1 + MKT) - 1,
cum_SMB = cumprod(1 + SMB) - 1,
cum_HML = cumprod(1 + HML) - 1
) %>%
pivot_longer(cols = starts_with("cum_"), names_to = "Factor", values_to = "Cum_Return") %>%
mutate(Factor = str_remove(Factor, "cum_")) %>%
ggplot(aes(x = date_num, y = Cum_Return, color = Factor)) +
geom_line(size = 1.2) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
scale_y_continuous(labels = percent_format()) +
scale_color_manual(values = c("MKT" = "#2196F3", "SMB" = "#4CAF50", "HML" = "#FF5722")) +
labs(
title = "Cumulative Factor Returns",
subtitle = "Growth of $1 invested in each factor (2011–2015)",
x = "Trading Day",
y = "Cumulative Return",
color = "Factor"
) +
theme_few(base_size = 13)cor_matrix <- data %>%
select(ri, MKT, SMB, HML) %>%
cor()
cor_matrix %>%
round(4) %>%
kbl(caption = "Correlation Matrix: Returns and Factors") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE) %>%
column_spec(1, bold = TRUE)| ri | MKT | SMB | HML | |
|---|---|---|---|---|
| ri | 1.0000 | 0.3388 | -0.0096 | 0.0087 |
| MKT | 0.3388 | 1.0000 | -0.0272 | 0.0197 |
| SMB | -0.0096 | -0.0272 | 1.0000 | -0.1866 |
| HML | 0.0087 | 0.0197 | -0.1866 | 1.0000 |
data %>%
select(symbol, ri, MKT, SMB, HML) %>%
pivot_longer(cols = c(MKT, SMB, HML), names_to = "Factor", values_to = "Factor_Return") %>%
ggplot(aes(x = Factor_Return, y = ri, color = symbol)) +
geom_point(alpha = 0.15, size = 0.8) +
geom_smooth(method = "lm", se = FALSE, size = 1, color = "black") +
facet_wrap(~Factor, scales = "free_x") +
scale_x_continuous(labels = percent_format()) +
scale_y_continuous(labels = percent_format()) +
labs(
title = "Stock Returns vs. Fama-French Factors",
x = "Factor Return",
y = "Stock Return (ri)",
color = "Stock"
) +
theme_few(base_size = 12)For each stock \(i\), regress its daily returns on the three Fama-French factors:
\[r_{i,t} = \alpha_i + \beta_{i,MKT} \cdot MKT_t + \beta_{i,SMB} \cdot SMB_t + \beta_{i,HML} \cdot HML_t + \varepsilon_{i,t}\]
step0 <- data %>%
nest(data = c(date, ri, MKT, SMB, HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
)) %>%
unnest(estimates) %>%
select(symbol, estimate, term) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
select(symbol,
b_MKT = MKT,
b_HML = HML,
b_SMB = SMB)
step0 %>%
mutate(across(where(is.numeric), ~round(., 4))) %>%
kbl(caption = "Step 0: Estimated Factor Loadings (Betas) per Stock") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE) %>%
column_spec(2:4, bold = TRUE)| symbol | b_MKT | b_HML | b_SMB |
|---|---|---|---|
| AAPL | 0.9000 | -0.0578 | 0.0685 |
| FORD | 0.5129 | 0.1380 | -0.2644 |
| GE | 1.0779 | 0.0902 | 0.0994 |
| GM | 1.2854 | -0.0222 | 0.0039 |
| IBM | 0.8169 | -0.0121 | 0.0336 |
| MSFT | 0.9656 | -0.0641 | 0.0582 |
r2_table <- data %>%
nest(data = c(date, ri, MKT, SMB, HML)) %>%
mutate(fit = map(data, ~lm(ri ~ MKT + SMB + HML, data = .x)),
glance_out = map(fit, glance)) %>%
unnest(glance_out) %>%
select(symbol, r.squared, adj.r.squared, statistic, p.value, nobs) %>%
mutate(across(where(is.numeric), ~round(., 4)))
r2_table %>%
kbl(caption = "Time-Series Regression: Model Fit per Stock") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = FALSE) %>%
column_spec(2, color = ifelse(r2_table$r.squared > 0.3, "darkgreen", "darkorange"),
bold = TRUE)| symbol | r.squared | adj.r.squared | statistic | p.value | nobs |
|---|---|---|---|---|---|
| AAPL | 0.2729 | 0.2712 | 156.7577 | 0.0000 | 1257 |
| FORD | 0.0090 | 0.0067 | 3.8122 | 0.0098 | 1257 |
| GE | 0.6125 | 0.6116 | 660.1958 | 0.0000 | 1257 |
| GM | 0.4379 | 0.4366 | 325.4098 | 0.0000 | 1257 |
| IBM | 0.4255 | 0.4241 | 309.2936 | 0.0000 | 1257 |
| MSFT | 0.4054 | 0.4040 | 284.7811 | 0.0000 | 1257 |
step0 %>%
pivot_longer(cols = starts_with("b_"), names_to = "Factor", values_to = "Beta") %>%
mutate(Factor = str_remove(Factor, "b_")) %>%
ggplot(aes(x = symbol, y = Beta, fill = Factor)) +
geom_col(position = "dodge", color = "white", width = 0.7) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_fill_brewer(palette = "Set1") +
labs(
title = "Estimated Factor Betas by Stock",
subtitle = "Step 0: Time-Series OLS estimates",
x = "Stock",
y = "Beta Coefficient",
fill = "Factor"
) +
theme_few(base_size = 13)For each date \(t\), regress the cross-section of returns on the estimated betas:
\[r_{i,t} = \lambda_0 + \lambda_{MKT} \hat{\beta}_{i,MKT} + \lambda_{SMB} \hat{\beta}_{i,SMB} + \lambda_{HML} \hat{\beta}_{i,HML} + \alpha_{i,t}\]
# Join betas back to data
step0_joined <- data %>%
left_join(step0, by = "symbol")
# Run T cross-sectional regressions (one per date)
step1 <- step0_joined %>%
nest(data = c(symbol, ri, b_MKT, b_SMB, b_HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x))
)) %>%
unnest(estimates) %>%
select(date, estimate, term) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
select(date, b_MKT, b_HML, b_SMB)
cat("Number of cross-sectional regressions run:", nrow(step1), "\n")## Number of cross-sectional regressions run: 1257
head(step1, 10) %>%
mutate(across(where(is.numeric), ~round(., 6))) %>%
kbl(caption = "Step 1: First 10 Cross-Sectional Lambda Estimates") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE)| date | b_MKT | b_HML | b_SMB |
|---|---|---|---|
| 4-Jan-11 | 0.041629 | 0.057372 | -0.025520 |
| 5-Jan-11 | -0.011347 | 0.062847 | -0.158046 |
| 6-Jan-11 | 0.037301 | -0.173234 | 0.007029 |
| 7-Jan-11 | 0.012722 | -0.064226 | 0.032269 |
| 10-Jan-11 | -0.036631 | 0.058646 | 0.017123 |
| 11-Jan-11 | 0.004089 | 0.089858 | -0.095361 |
| 12-Jan-11 | -0.055365 | 0.043036 | -0.164496 |
| 13-Jan-11 | -0.019357 | 0.025630 | 0.001815 |
| 14-Jan-11 | -0.016486 | 0.039214 | 0.063259 |
| 18-Jan-11 | 0.010146 | -0.090027 | 0.052508 |
step1 %>%
pivot_longer(cols = c(b_MKT, b_SMB, b_HML),
names_to = "Factor", values_to = "Lambda") %>%
mutate(Factor = str_remove(Factor, "b_")) %>%
ggplot(aes(x = Lambda, fill = Factor)) +
geom_histogram(bins = 50, alpha = 0.7, color = "white") +
geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
facet_wrap(~Factor, scales = "free") +
scale_fill_brewer(palette = "Set1") +
scale_x_continuous(labels = percent_format()) +
labs(
title = "Distribution of Cross-Sectional Lambda Estimates",
subtitle = "One lambda per trading day (Step 1)",
x = "Lambda (Risk Premium)",
y = "Count"
) +
theme_few(base_size = 13) +
theme(legend.position = "none")step1 %>%
mutate(t = row_number()) %>%
pivot_longer(cols = c(b_MKT, b_SMB, b_HML),
names_to = "Factor", values_to = "Lambda") %>%
mutate(Factor = str_remove(Factor, "b_")) %>%
ggplot(aes(x = t, y = Lambda, color = Factor)) +
geom_line(alpha = 0.6) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray40") +
geom_smooth(method = "loess", se = TRUE, alpha = 0.15, size = 1.2) +
facet_wrap(~Factor, ncol = 1, scales = "free_y") +
scale_y_continuous(labels = percent_format()) +
scale_color_manual(values = c("MKT" = "#2196F3", "SMB" = "#4CAF50", "HML" = "#FF5722")) +
labs(
title = "Time-Varying Risk Premia (Lambdas) — Step 1",
subtitle = "Cross-sectional lambda estimates over time with LOESS trend",
x = "Trading Day",
y = "Lambda"
) +
theme_few(base_size = 12) +
theme(legend.position = "none")The Fama-MacBeth estimate of the risk premium is the time-series average of the step-1 lambdas. We test whether each is significantly different from zero.
\[\hat{\lambda}_k = \frac{1}{T} \sum_{t=1}^{T} \hat{\lambda}_{k,t}, \quad t\text{-stat} = \frac{\hat{\lambda}_k}{SE(\hat{\lambda}_k)}\]
## === Fama-MacBeth Results ===
## --- MKT Factor ---
##
## One Sample t-test
##
## data: step1$b_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002546371 0.001722208
## sample estimates:
## mean of x
## -0.0004120813
##
## --- SMB Factor ---
##
## One Sample t-test
##
## data: step1$b_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003711466 0.011076953
## sample estimates:
## mean of x
## 0.003682744
##
## --- HML Factor ---
##
## One Sample t-test
##
## data: step1$b_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005541205 0.004607776
## sample estimates:
## mean of x
## -0.0004667146
results <- tibble(
Factor = c("MKT", "SMB", "HML"),
Mean_Lambda = c(mean(step1$b_MKT), mean(step1$b_SMB), mean(step1$b_HML)),
Std_Dev = c(sd(step1$b_MKT), sd(step1$b_SMB), sd(step1$b_HML)),
Std_Error = c(mkt_test$stderr, smb_test$stderr, hml_test$stderr),
T_Stat = c(mkt_test$statistic, smb_test$statistic, hml_test$statistic),
P_Value = c(mkt_test$p.value, smb_test$p.value, hml_test$p.value),
CI_Low = c(mkt_test$conf.int[1], smb_test$conf.int[1], hml_test$conf.int[1]),
CI_High = c(mkt_test$conf.int[2], smb_test$conf.int[2], hml_test$conf.int[2]),
Significant = c(
ifelse(mkt_test$p.value < 0.05, "✓ Yes", "✗ No"),
ifelse(smb_test$p.value < 0.05, "✓ Yes", "✗ No"),
ifelse(hml_test$p.value < 0.05, "✓ Yes", "✗ No")
)
)
results %>%
mutate(across(where(is.numeric), ~round(., 5))) %>%
kbl(caption = "Fama-MacBeth Final Results: Risk Premium Estimates",
col.names = c("Factor", "Mean λ", "Std Dev", "Std Error",
"t-stat", "p-value", "CI Low", "CI High", "Sig. (5%)")) %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = TRUE) %>%
row_spec(which(results$P_Value < 0.05), bold = TRUE,
background = "#e8f5e9") %>%
row_spec(which(results$P_Value >= 0.05), color = "gray40")| Factor | Mean λ | Std Dev | Std Error | t-stat | p-value | CI Low | CI High | Sig. (5%) |
|---|---|---|---|---|---|---|---|---|
| MKT | -0.00041 | 0.03857 | 0.00109 | -0.37879 | 0.70491 | -0.00255 | 0.00172 | ✗ No |
| SMB | 0.00368 | 0.13363 | 0.00377 | 0.97712 | 0.32870 | -0.00371 | 0.01108 | ✗ No |
| HML | -0.00047 | 0.09171 | 0.00259 | -0.18044 | 0.85684 | -0.00554 | 0.00461 | ✗ No |
step1 %>%
mutate(
t = row_number(),
roll_MKT = cumsum(b_MKT) / t,
roll_SMB = cumsum(b_SMB) / t,
roll_HML = cumsum(b_HML) / t
) %>%
pivot_longer(cols = starts_with("roll_"),
names_to = "Factor", values_to = "Rolling_Mean") %>%
mutate(Factor = str_remove(Factor, "roll_")) %>%
ggplot(aes(x = t, y = Rolling_Mean, color = Factor)) +
geom_line(size = 1) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
facet_wrap(~Factor, ncol = 1, scales = "free_y") +
scale_y_continuous(labels = percent_format()) +
scale_color_manual(values = c("MKT" = "#2196F3", "SMB" = "#4CAF50", "HML" = "#FF5722")) +
labs(
title = "Cumulative Average Lambda (Convergence Plot)",
subtitle = "Running mean of cross-sectional risk premia — does it stabilize?",
x = "Trading Day",
y = "Cumulative Mean Lambda"
) +
theme_few(base_size = 12) +
theme(legend.position = "none")findings <- results %>%
mutate(
Interpretation = case_when(
P_Value < 0.01 ~ "Highly significant risk premium",
P_Value < 0.05 ~ "Significant risk premium at 5%",
P_Value < 0.10 ~ "Marginally significant (10%)",
TRUE ~ "Not statistically significant"
),
Annual_Lambda = round(Mean_Lambda * 252, 4)
) %>%
select(Factor, Mean_Lambda, T_Stat, P_Value, Annual_Lambda, Interpretation)
findings %>%
mutate(across(c(Mean_Lambda, T_Stat, P_Value, Annual_Lambda), ~round(., 5))) %>%
kbl(caption = "Final Interpretation Table") %>%
kable_styling(bootstrap_options = c("striped", "hover"),
full_width = TRUE) %>%
column_spec(6, italic = TRUE)| Factor | Mean_Lambda | T_Stat | P_Value | Annual_Lambda | Interpretation |
|---|---|---|---|---|---|
| MKT | -0.00041 | -0.37879 | 0.70491 | -0.1038 | Not statistically significant |
| SMB | 0.00368 | 0.97712 | 0.32870 | 0.9281 | Not statistically significant |
| HML | -0.00047 | -0.18044 | 0.85684 | -0.1176 | Not statistically significant |
The Fama-MacBeth two-pass procedure applied to a cross-section of 6 U.S. stocks (2011–2015) yields the following conclusions:
MKT (Market Beta): The estimated market risk premium \(\hat{\lambda}_{MKT}\) tests whether exposure to market risk is priced. A significant positive value would confirm the CAPM intuition.
SMB (Size Factor): Tests whether small-cap exposure earns a return premium over the sample period.
HML (Value Factor): Tests whether value stocks (high book-to-market) outperform growth stocks.
Analysis conducted in R using the broom and
tidyverse packages. Data sourced from the Fama-French Data
Library.