Introduction

Fama-MacBeth (1973) regression is a two-pass method widely used in empirical asset pricing to estimate risk premia. It involves:

  1. Step 1 — Time-Series Regressions: For each asset \(i\), regress excess returns on factors to get factor loadings (betas).
  2. Step 2 — Cross-Sectional Regressions: For each time period \(t\), regress cross-sectional returns on the betas from Step 1.
  3. Step 3 — Time-Series Average: Average the T cross-sectional coefficients to get the final risk premium estimates, and use Fama-MacBeth standard errors.

Load Libraries

# Install if needed:
# install.packages(c("tidyverse", "broom", "kableExtra", "ggplot2"))

library(tidyverse)
library(broom)
library(kableExtra)
library(ggplot2)

Step 0: Generate / Load Sample Data

We simulate a panel dataset of 30 stocks over 60 months with:

  • Mkt_RF: Market excess return (factor 1)
  • SMB: Small-Minus-Big factor (factor 2)
  • HML: High-Minus-Low factor (factor 3)
  • Ret: Stock excess return
set.seed(42)

n_stocks <- 30
n_months <- 60
stock_ids <- paste0("Stock_", str_pad(1:n_stocks, 2, pad = "0"))
months     <- seq(as.Date("2019-01-01"), by = "month", length.out = n_months)

# True betas for each stock (randomly assigned)
true_beta_mkt <- runif(n_stocks, 0.5, 1.8)
true_beta_smb <- runif(n_stocks, -0.5, 1.2)
true_beta_hml <- runif(n_stocks, -0.3, 1.0)

# Factor returns (monthly)
Mkt_RF <- rnorm(n_months, mean = 0.005, sd = 0.04)
SMB    <- rnorm(n_months, mean = 0.002, sd = 0.02)
HML    <- rnorm(n_months, mean = 0.003, sd = 0.02)

factors <- tibble(Date = months, Mkt_RF, SMB, HML)

# Build panel
panel <- map_dfr(1:n_stocks, function(i) {
  eps <- rnorm(n_months, 0, 0.03)
  Ret <- 0.001 +
         true_beta_mkt[i] * Mkt_RF +
         true_beta_smb[i] * SMB +
         true_beta_hml[i] * HML +
         eps
  tibble(
    Date   = months,
    Stock  = stock_ids[i],
    Ret    = Ret,
    Mkt_RF = Mkt_RF,
    SMB    = SMB,
    HML    = HML
  )
})

head(panel, 10) %>%
  kable(caption = "Sample Panel Data (first 10 rows)", digits = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Sample Panel Data (first 10 rows)
Date Stock Ret Mkt_RF SMB HML
2019-01-01 Stock_01 0.0549 0.0223 0.0041 0.0316
2019-02-01 Stock_01 -0.0445 -0.0275 -0.0064 -0.0169
2019-03-01 Stock_01 0.1066 0.0628 -0.0004 0.0121
2019-04-01 Stock_01 -0.0324 -0.0123 0.0058 0.0047
2019-05-01 Stock_01 0.1067 0.0312 0.0044 0.0209
2019-06-01 Stock_01 0.0233 0.0179 0.0015 -0.0016
2019-07-01 Stock_01 -0.0005 -0.0264 0.0042 0.0197
2019-08-01 Stock_01 0.0556 0.0680 -0.0077 -0.0319
2019-09-01 Stock_01 0.0541 0.0307 -0.0081 0.0368
2019-10-01 Stock_01 -0.0044 0.0086 -0.0312 0.0203

Step 1: Time-Series Regressions (N Regressions)

For each stock \(i\), estimate:

\[R_{i,t} = \alpha_i + \beta_{i,Mkt} \cdot MktRF_t + \beta_{i,SMB} \cdot SMB_t + \beta_{i,HML} \cdot HML_t + \varepsilon_{i,t}\]

ts_betas <- panel %>%
  group_by(Stock) %>%
  do(tidy(lm(Ret ~ Mkt_RF + SMB + HML, data = .))) %>%
  ungroup() %>%
  filter(term != "(Intercept)") %>%
  select(Stock, term, estimate) %>%
  pivot_wider(names_from = term, values_from = estimate) %>%
  rename(
    beta_mkt = Mkt_RF,
    beta_smb = SMB,
    beta_hml = HML
  )

ts_betas %>%
  kable(caption = "Step 1: Estimated Betas from Time-Series Regressions",
        digits = 4) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Step 1: Estimated Betas from Time-Series Regressions
Stock beta_mkt beta_smb beta_hml
Stock_01 1.7447 0.6593 0.2079
Stock_02 1.6424 0.7349 0.9703
Stock_03 0.8398 0.5639 0.5662
Stock_04 1.4259 0.5597 0.3883
Stock_05 1.2341 -0.5386 0.5626
Stock_06 1.1205 0.6327 0.0097
Stock_07 1.7232 -0.3079 0.1718
Stock_08 0.8429 0.0192 0.8399
Stock_09 1.3775 0.9612 0.5917
Stock_10 1.4406 0.7269 -0.1799
Stock_11 0.9668 -0.0182 -0.3153
Stock_12 1.2504 0.4373 0.0757
Stock_13 1.7334 -0.0387 -0.1632
Stock_14 0.7895 0.7222 0.0749
Stock_15 1.0288 0.2734 -0.1800
Stock_16 1.7967 1.4308 0.7670
Stock_17 1.8122 0.5999 -0.2620
Stock_18 0.7380 0.4687 0.1242
Stock_19 0.9294 1.0571 0.0605
Stock_20 1.1569 0.8242 -0.2607
Stock_21 1.4336 0.1210 0.3629
Stock_22 0.6927 -0.2357 0.0573
Stock_23 1.9667 0.0954 0.2307
Stock_24 1.7069 0.7653 0.5022
Stock_25 0.7308 -0.0650 0.5753
Stock_26 0.9496 0.5888 0.0857
Stock_27 1.0941 0.5471 -0.1466
Stock_28 1.7588 -0.0105 0.0124
Stock_29 1.1908 -0.3107 -0.1833
Stock_30 1.5925 0.5303 0.0903

Distribution of Estimated Betas

ts_betas_long <- ts_betas %>%
  pivot_longer(cols = starts_with("beta_"), names_to = "Factor", values_to = "Beta") %>%
  mutate(Factor = recode(Factor,
    "beta_mkt" = "Market Beta",
    "beta_smb" = "SMB Beta",
    "beta_hml" = "HML Beta"
  ))

ggplot(ts_betas_long, aes(x = Beta, fill = Factor)) +
  geom_histogram(bins = 12, color = "white", alpha = 0.85) +
  facet_wrap(~Factor, scales = "free") +
  scale_fill_manual(values = c("#2196F3", "#4CAF50", "#FF5722")) +
  labs(
    title = "Distribution of Estimated Betas Across Stocks",
    subtitle = "Step 1: Time-Series Regression Results",
    x = "Beta Estimate", y = "Count"
  ) +
  theme_minimal(base_size = 13) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold"))


Step 2: Cross-Sectional Regressions (T Regressions)

Merge betas back into the panel, then for each time period \(t\), run:

\[R_{i,t} = \lambda_{0,t} + \lambda_{Mkt,t} \cdot \hat{\beta}_{i,Mkt} + \lambda_{SMB,t} \cdot \hat{\beta}_{i,SMB} + \lambda_{HML,t} \cdot \hat{\beta}_{i,HML} + \alpha_{i,t}\]

# Merge betas with panel returns
panel_with_betas <- panel %>%
  left_join(ts_betas, by = "Stock")

# Cross-sectional regression for each month
cs_lambdas <- panel_with_betas %>%
  group_by(Date) %>%
  do(tidy(lm(Ret ~ beta_mkt + beta_smb + beta_hml, data = .))) %>%
  ungroup() %>%
  rename(
    lambda    = estimate,
    se_lambda = std.error
  )

# Show first few months
cs_lambdas %>%
  filter(Date <= as.Date("2019-06-01")) %>%
  select(Date, term, lambda, se_lambda, p.value) %>%
  kable(caption = "Step 2: Cross-Sectional Lambda Estimates (first 6 months)",
        digits = 5) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Step 2: Cross-Sectional Lambda Estimates (first 6 months)
Date term lambda se_lambda p.value
2019-01-01 (Intercept) 0.02076 0.01996 0.30789
2019-01-01 beta_mkt 0.00986 0.01479 0.51088
2019-01-01 beta_smb 0.00195 0.01265 0.87860
2019-01-01 beta_hml 0.02888 0.01642 0.09045
2019-02-01 (Intercept) -0.02264 0.02145 0.30101
2019-02-01 beta_mkt -0.01391 0.01590 0.38980
2019-02-01 beta_smb -0.00894 0.01359 0.51641
2019-02-01 beta_hml 0.00114 0.01765 0.94916
2019-03-01 (Intercept) -0.00249 0.02312 0.91490
2019-03-01 beta_mkt 0.05821 0.01714 0.00220
2019-03-01 beta_smb -0.00993 0.01465 0.50385
2019-03-01 beta_hml 0.04551 0.01902 0.02426
2019-04-01 (Intercept) 0.00711 0.02259 0.75544
2019-04-01 beta_mkt -0.01968 0.01674 0.25053
2019-04-01 beta_smb 0.02024 0.01431 0.16914
2019-04-01 beta_hml -0.02143 0.01859 0.25946
2019-05-01 (Intercept) 0.00690 0.02228 0.75918
2019-05-01 beta_mkt 0.01672 0.01651 0.32069
2019-05-01 beta_smb 0.00997 0.01411 0.48607
2019-05-01 beta_hml 0.04227 0.01833 0.02932
2019-06-01 (Intercept) 0.01871 0.02033 0.36585
2019-06-01 beta_mkt 0.00941 0.01507 0.53779
2019-06-01 beta_smb -0.00368 0.01288 0.77727
2019-06-01 beta_hml -0.01558 0.01673 0.36036

Lambda Time Series Plot

cs_lambdas %>%
  filter(term != "(Intercept)") %>%
  mutate(term = recode(term,
    "beta_mkt" = "λ Market",
    "beta_smb" = "λ SMB",
    "beta_hml" = "λ HML"
  )) %>%
  ggplot(aes(x = Date, y = lambda, color = term)) +
  geom_line(size = 0.8) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
  facet_wrap(~term, ncol = 1, scales = "free_y") +
  scale_color_manual(values = c("#2196F3", "#4CAF50", "#FF5722")) +
  labs(
    title = "Step 2: Time Series of Cross-Sectional Lambda Estimates",
    x = "Date", y = "Lambda (Risk Premium)"
  ) +
  theme_minimal(base_size = 12) +
  theme(legend.position = "none",
        plot.title = element_text(face = "bold"))


Step 3: Fama-MacBeth Estimates (Time-Series Average)

Average the \(T\) cross-sectional lambdas. The Fama-MacBeth standard error is:

\[SE_{FM}(\hat{\lambda}) = \frac{s(\hat{\lambda}_t)}{\sqrt{T}}\]

where \(s(\hat{\lambda}_t)\) is the time-series standard deviation of the cross-sectional estimates.

fm_results <- cs_lambdas %>%
  group_by(term) %>%
  summarise(
    FM_Estimate  = mean(lambda),
    FM_Std_Dev   = sd(lambda),
    T            = n(),
    FM_SE        = sd(lambda) / sqrt(n()),
    FM_t_stat    = mean(lambda) / (sd(lambda) / sqrt(n())),
    FM_p_value   = 2 * pt(-abs(mean(lambda) / (sd(lambda) / sqrt(n()))), df = n() - 1)
  ) %>%
  mutate(
    Significance = case_when(
      FM_p_value < 0.01 ~ "***",
      FM_p_value < 0.05 ~ "**",
      FM_p_value < 0.10 ~ "*",
      TRUE              ~ ""
    )
  )

fm_results %>%
  kable(caption = "Step 3: Fama-MacBeth Final Results",
        digits = 5,
        col.names = c("Term", "FM Estimate", "Std Dev", "T", "FM Std Error",
                      "t-Statistic", "p-Value", "Sig.")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered")) %>%
  row_spec(which(fm_results$FM_p_value < 0.05), bold = TRUE, color = "white",
           background = "#2196F3")
Step 3: Fama-MacBeth Final Results
Term FM Estimate Std Dev T FM Std Error t-Statistic p-Value Sig.
(Intercept) -0.00207 0.01567 60 0.00202 -1.02444 0.30981
beta_hml 0.00365 0.02550 60 0.00329 1.10966 0.27165
beta_mkt 0.01305 0.03959 60 0.00511 2.55250 0.01330 **
beta_smb -0.00289 0.02035 60 0.00263 -1.10051 0.27558

Final Summary Table

fm_results %>%
  filter(term != "(Intercept)") %>%
  mutate(
    term = recode(term,
      "beta_mkt" = "Market (MKT-RF)",
      "beta_smb" = "Size (SMB)",
      "beta_hml" = "Value (HML)"
    ),
    Result = paste0(
      round(FM_Estimate, 5), " ", Significance,
      "\n(", round(FM_SE, 5), ")"
    )
  ) %>%
  select(Factor = term, `Risk Premium` = FM_Estimate,
         `FM Std Error` = FM_SE, `t-Statistic` = FM_t_stat,
         `p-Value` = FM_p_value, Sig. = Significance) %>%
  kable(caption = "Fama-MacBeth Risk Premium Estimates (3-Factor Model)",
        digits = 5) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
                full_width = FALSE) %>%
  add_header_above(c(" " = 1, "Fama-MacBeth (1973)" = 5))
Fama-MacBeth Risk Premium Estimates (3-Factor Model)
Fama-MacBeth (1973)
Factor Risk Premium FM Std Error t-Statistic p-Value Sig.
Value (HML) 0.00365 0.00329 1.10966 0.27165
Market (MKT-RF) 0.01305 0.00511 2.55250 0.01330 **
Size (SMB) -0.00289 0.00263 -1.10051 0.27558

Interpretation

  • FM Estimate: The average risk premium for each factor across all cross-sections.
  • FM Std Error: Fama-MacBeth standard error = \(SD(\hat{\lambda}_t) / \sqrt{T}\).
  • t-Statistic: Tests whether the risk premium is significantly different from zero.
  • Significance codes: *** p<0.01, ** p<0.05, * p<0.10

Note: In real applications, you would use actual stock return data (e.g., from CRSP) and Fama-French factor data (available from Kenneth French’s data library).


Replicated from: The Data Hall — Fama-MacBeth Regression in R
Reference: Fama, E. F., & MacBeth, J. D. (1973). Risk, return, and equilibrium: Empirical tests. Journal of Political Economy, 81(3), 607–636.