The Fama-MacBeth (1973) procedure estimates factor risk premiums in three steps:
library(readr)
library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(sandwich)
library(lmtest)
library(ggplot2)
library(knitr)The dataset contains daily stock returns for 6 stocks (AAPL, FORD, GE, GM, IBM, MSFT) along with the Fama-French 3 factors (MKT, SMB, HML), covering the period 2011–2015.
data <- read_csv("data.csv")
data <- data %>%
mutate(date = as.Date(date, format = "%d-%b-%y"))
glimpse(data)## Rows: 7,542
## Columns: 6
## $ symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL",…
## $ date <date> 2011-01-04, 2011-01-05, 2011-01-06, 2011-01-07, 2011-01-10, 20…
## $ ri <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.0186…
## $ MKT <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.0…
## $ SMB <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.002…
## $ HML <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.00…
summary_tbl <- data %>%
group_by(symbol) %>%
summarise(
Start = min(date),
End = max(date),
`# Days` = n(),
`Mean Return` = round(mean(ri, na.rm = TRUE), 5),
`SD Return` = round(sd(ri, na.rm = TRUE), 5)
)
kable(summary_tbl,
caption = "Dataset Summary by Stock")| symbol | Start | End | # Days | Mean Return | SD Return |
|---|---|---|---|---|---|
| AAPL | 2011-01-04 | 2015-12-31 | 1257 | 0.00070 | 0.01680 |
| FORD | 2011-01-04 | 2015-12-31 | 1257 | -0.00058 | 0.05549 |
| GE | 2011-01-04 | 2015-12-31 | 1257 | 0.00056 | 0.01345 |
| GM | 2011-01-04 | 2015-12-31 | 1257 | -0.00001 | 0.01895 |
| IBM | 2011-01-04 | 2015-12-31 | 1257 | -0.00006 | 0.01221 |
| MSFT | 2011-01-04 | 2015-12-31 | 1257 | 0.00065 | 0.01479 |
ggplot(data, aes(x = date, y = ri, colour = symbol)) +
geom_line(linewidth = 0.4, alpha = 0.8) +
facet_wrap(~symbol, ncol = 2, scales = "free_y") +
labs(title = "Daily Return Series by Stock (2011–2015)",
x = NULL, y = "Daily Return") +
theme_minimal(base_size = 11) +
theme(legend.position = "none")For each stock, regress daily returns on MKT, SMB, and HML over the full sample to obtain each stock’s factor exposures (betas).
stocks <- unique(data$symbol)
# Run one OLS per stock
ts_models <- map(stocks, function(stk) {
d <- data %>% filter(symbol == stk)
lm(ri ~ MKT + SMB + HML, data = d)
})
names(ts_models) <- stocks
# Extract coefficients
betas <- map_dfr(ts_models, function(m) {
tibble(
alpha = coef(m)[1],
beta_mkt = coef(m)[2],
beta_smb = coef(m)[3],
beta_hml = coef(m)[4]
)
}, .id = "Stock")
kable(betas,
col.names = c("Stock", "α (Alpha)", "β MKT", "β SMB", "β HML"),
digits = 4,
caption = "Step 1: Estimated Factor Loadings (Betas) per Stock")| Stock | α (Alpha) | β MKT | β SMB | β HML |
|---|---|---|---|---|
| AAPL | 4e-04 | 0.9000 | 0.0685 | -0.0578 |
| FORD | -8e-04 | 0.5129 | -0.2644 | 0.1380 |
| GE | 1e-04 | 1.0779 | 0.0994 | 0.0902 |
| GM | -5e-04 | 1.2854 | 0.0039 | -0.0222 |
| IBM | -4e-04 | 0.8169 | 0.0336 | -0.0121 |
| MSFT | 3e-04 | 0.9656 | 0.0582 | -0.0641 |
betas %>%
pivot_longer(c(beta_mkt, beta_smb, beta_hml),
names_to = "Factor", values_to = "Loading") %>%
mutate(Factor = recode(Factor,
beta_mkt = "β MKT",
beta_smb = "β SMB",
beta_hml = "β HML")) %>%
ggplot(aes(x = Stock, y = Loading, fill = Loading > 0)) +
geom_col(show.legend = FALSE) +
geom_hline(yintercept = 0, linetype = "dashed") +
facet_wrap(~Factor, scales = "free_y") +
scale_fill_manual(values = c("TRUE" = "#4575b4", "FALSE" = "#d73027")) +
labs(title = "Step 1: Factor Loadings per Stock",
x = NULL, y = "Beta") +
theme_minimal(base_size = 11)At each trading day t, run a cross-sectional OLS of stock returns on the betas from Step 1, yielding a daily time series of \(\lambda_t\) estimates.
# Merge betas into panel
panel <- data %>%
left_join(betas %>%
rename(symbol = Stock) %>%
select(symbol, beta_mkt, beta_smb, beta_hml),
by = "symbol")
# Cross-sectional regression at each date
lambdas <- panel %>%
group_by(date) %>%
do(tidy(lm(ri ~ beta_mkt + beta_smb + beta_hml, data = .))) %>%
ungroup() %>%
select(date, term, estimate) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
rename(
lambda_0 = `(Intercept)`,
lambda_MKT = beta_mkt,
lambda_SMB = beta_smb,
lambda_HML = beta_hml
)
cat("Cross-sectional regressions run:", nrow(lambdas), "\n")## Cross-sectional regressions run: 1257
kable(head(lambdas, 8),
col.names = c("date", "lambda_0", "lambda_MKT", "lambda_SMB", "lambda_HML"),
digits = 5,
caption = "Sample of Daily λ Estimates (Second Pass)")| date | lambda_0 | lambda_MKT | lambda_SMB | lambda_HML |
|---|---|---|---|---|
| 2011-01-04 | -0.02976 | 0.04163 | -0.02552 | 0.05737 |
| 2011-01-05 | 0.02233 | -0.01135 | -0.15805 | 0.06285 |
| 2011-01-06 | -0.02924 | 0.03730 | 0.00703 | -0.17323 |
| 2011-01-07 | -0.01750 | 0.01272 | 0.03227 | -0.06423 |
| 2011-01-10 | 0.03641 | -0.03663 | 0.01712 | 0.05865 |
| 2011-01-11 | 0.00274 | 0.00409 | -0.09536 | 0.08986 |
| 2011-01-12 | 0.07233 | -0.05536 | -0.16450 | 0.04304 |
| 2011-01-13 | 0.01503 | -0.01936 | 0.00181 | 0.02563 |
The final estimates are time-series means of the \(\lambda_t\) series, with standard errors computed from their time-series variation:
\[t(\hat\lambda) = \frac{\bar\lambda}{SD(\lambda_t)/\sqrt{T}}\]
fm_test <- function(x, label) {
x <- na.omit(x)
n <- length(x)
mu <- mean(x)
se <- sd(x) / sqrt(n)
ts <- mu / se
pv <- 2 * pt(-abs(ts), df = n - 1)
tibble(
Factor = label,
`Mean λ` = mu,
`Std. Error` = se,
`t-Statistic` = ts,
`p-Value` = pv,
Sig. = ifelse(pv < 0.01, "***",
ifelse(pv < 0.05, "**",
ifelse(pv < 0.10, "*", "")))
)
}
fm_table <- bind_rows(
fm_test(lambdas$lambda_0, "Intercept (λ₀)"),
fm_test(lambdas$lambda_MKT, "Market Premium (λ_MKT)"),
fm_test(lambdas$lambda_SMB, "Size Premium (λ_SMB)"),
fm_test(lambdas$lambda_HML, "Value Premium (λ_HML)")
)
kable(fm_table, digits = 5,
caption = "Fama-MacBeth Risk Premia Estimates")| Factor | Mean λ | Std. Error | t-Statistic | p-Value | Sig. |
|---|---|---|---|---|---|
| Intercept (λ₀) | 0.00060 | 0.00109 | 0.54901 | 0.58309 | |
| Market Premium (λ_MKT) | -0.00041 | 0.00109 | -0.37879 | 0.70491 | |
| Size Premium (λ_SMB) | 0.00368 | 0.00377 | 0.97712 | 0.32870 | |
| Value Premium (λ_HML) | -0.00047 | 0.00259 | -0.18044 | 0.85684 |
## *** p<0.01 ** p<0.05 * p<0.10
lambdas %>%
pivot_longer(-date, names_to = "Factor", values_to = "lambda") %>%
mutate(Factor = recode(Factor,
lambda_0 = "Intercept (λ₀)",
lambda_MKT = "Market λ",
lambda_SMB = "SMB λ",
lambda_HML = "HML λ")) %>%
ggplot(aes(x = date, y = lambda)) +
geom_line(colour = "#2c7bb6", linewidth = 0.4, alpha = 0.8) +
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
facet_wrap(~Factor, ncol = 1, scales = "free_y") +
labs(title = "Time Series of Cross-Sectional λ Estimates",
x = NULL, y = "Lambda") +
theme_minimal(base_size = 11)fm_table %>%
filter(Factor != "Intercept (λ₀)") %>%
mutate(
ci_lo = `Mean λ` - 1.96 * `Std. Error`,
ci_hi = `Mean λ` + 1.96 * `Std. Error`,
sig = ifelse(Sig. != "", "Significant", "Not Significant")
) %>%
ggplot(aes(x = Factor, y = `Mean λ`, fill = sig)) +
geom_col(width = 0.5) +
geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi),
width = 0.15, linewidth = 0.8) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_fill_manual(values = c("Significant" = "#4575b4",
"Not Significant" = "#aec3d4")) +
labs(title = "Risk Premia with 95% Confidence Intervals",
x = NULL, y = "Mean λ", fill = "") +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")lambdas %>%
pivot_longer(-date, names_to = "Factor", values_to = "lambda") %>%
filter(Factor != "lambda_0") %>%
mutate(Factor = recode(Factor,
lambda_MKT = "Market λ",
lambda_SMB = "SMB λ",
lambda_HML = "HML λ")) %>%
ggplot(aes(x = lambda, fill = Factor)) +
geom_histogram(bins = 60, colour = "white", alpha = 0.85,
show.legend = FALSE) +
geom_vline(xintercept = 0, linetype = "dashed", colour = "grey30") +
facet_wrap(~Factor, scales = "free") +
labs(title = "Distribution of Cross-Sectional λ Estimates",
x = "Lambda", y = "Count") +
theme_minimal(base_size = 11)Standard FM SEs correct only for cross-sectional correlation. Newey-West (1987) SEs additionally correct for time-series autocorrelation in the \(\lambda_t\) series (6 lags).
nw_test <- function(x, label) {
x <- na.omit(x)
n <- length(x)
lag <- 6
m <- lm(x ~ 1)
se <- sqrt(NeweyWest(m, lag = lag, prewhite = FALSE)[1, 1])
mu <- mean(x)
ts <- mu / se
pv <- 2 * pt(-abs(ts), df = n - 1)
tibble(
Factor = label,
`Mean λ` = mu,
`NW Std. Error` = se,
`NW t-Stat` = ts,
`p-Value` = pv,
Sig. = ifelse(pv < 0.01, "***",
ifelse(pv < 0.05, "**",
ifelse(pv < 0.10, "*", "")))
)
}
nw_table <- bind_rows(
nw_test(lambdas$lambda_0, "Intercept (λ₀)"),
nw_test(lambdas$lambda_HML, "Value Premium (λ_HML)"),
nw_test(lambdas$lambda_MKT, "Market Premium (λ_MKT)"),
nw_test(lambdas$lambda_SMB, "Size Premium (λ_SMB)")
)
kable(nw_table, digits = 5,
caption = "Fama-MacBeth with Newey-West Standard Errors (6 lags)")| Factor | Mean λ | NW Std. Error | NW t-Stat | p-Value | Sig. |
|---|---|---|---|---|---|
| Intercept (λ₀) | 0.00060 | 0.00095 | 0.63171 | 0.52769 | |
| Value Premium (λ_HML) | -0.00047 | 0.00220 | -0.21198 | 0.83216 | |
| Market Premium (λ_MKT) | -0.00041 | 0.00101 | -0.40985 | 0.68198 | |
| Size Premium (λ_SMB) | 0.00368 | 0.00295 | 1.24658 | 0.21278 |
## Newey-West SE with 6 lags. *** p<0.01 ** p<0.05 * p<0.10
# Combine both tables for comparison
fm_comp <- fm_table %>%
filter(Factor != "Intercept (λ₀)") %>%
select(Factor, `Mean λ`, SE = `Std. Error`) %>%
mutate(Method = "Standard FM")
nw_comp <- nw_table %>%
filter(Factor != "Intercept (λ₀)") %>%
select(Factor, `Mean λ`, SE = `NW Std. Error`) %>%
mutate(Method = "Newey-West")
bind_rows(fm_comp, nw_comp) %>%
mutate(
ci_lo = `Mean λ` - 1.96 * SE,
ci_hi = `Mean λ` + 1.96 * SE
) %>%
ggplot(aes(x = Factor, y = `Mean λ`, colour = Method,
group = Method)) +
geom_point(position = position_dodge(0.4), size = 3) +
geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi),
position = position_dodge(0.4),
width = 0.2, linewidth = 0.8) +
geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
scale_colour_manual(values = c("Standard FM" = "#4575b4",
"Newey-West" = "#d73027")) +
labs(title = "Comparison: Standard FM vs Newey-West Standard Errors",
subtitle = "Error bars = 95% confidence interval",
x = NULL, y = "Mean λ", colour = "Method") +
theme_minimal(base_size = 11) +
theme(legend.position = "bottom")| Factor | Theory | Expected Sign |
|---|---|---|
| Market (MKT) | Higher market β → higher expected return (CAPM) | Positive (+) |
| Size (SMB) | Smaller firms (high SMB β) earn a size premium | Positive (+) |
| Value (HML) | Value firms (high HML β) earn a value premium | Positive (+) |
| Factor | Mean λ | NW t-Stat | p-Value | Result |
|---|---|---|---|---|
| Value Premium (λ_HML) | -0.00047 | -0.21198 | 0.83216 | Not statistically significant |
| Market Premium (λ_MKT) | -0.00041 | -0.40985 | 0.68198 | Not statistically significant |
| Size Premium (λ_SMB) | 0.00368 | 1.24658 | 0.21278 | Not statistically significant |
Key points: