1 Overview

The Fama-MacBeth (1973) procedure estimates risk premia for systematic factors by correcting standard errors for cross-sectional correlation. It is the standard approach when we have more cross-sections than time periods.

Two-step procedure:

Step Action Output
Step 0 N time-series regressions (one per asset) Factor betas \(\hat{\beta}_i\)
Step 1 T cross-sectional regressions (one per day) Daily premia \(\hat{\lambda}_t\)
Step 2 Average premia + t-test vs. \(\mu = 0\) Risk-premium inference

2 Setup

library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
library(knitr)

theme_set(
  theme_minimal(base_size = 12) +
    theme(
      plot.title       = element_text(face = "bold", size = 13),
      plot.subtitle    = element_text(colour = "grey50", size = 11),
      panel.grid.minor = element_blank(),
      axis.title       = element_text(size = 11),
      legend.position  = "bottom"
    )
)
data <- read.csv("data.csv")

# Rename tickers to full company names
data <- data %>%
  mutate(company = recode(symbol,
    "AAPL" = "Apple Inc.",
    "FORD" = "Ford Motor Co.",
    "GE"   = "General Electric",
    "GM"   = "General Motors",
    "IBM"  = "IBM Corp.",
    "MSFT" = "Microsoft Corp."
  ))

glimpse(data)
## Rows: 7,542
## Columns: 7
## $ symbol  <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL"…
## $ date    <chr> "4-Jan-11", "5-Jan-11", "6-Jan-11", "7-Jan-11", "10-Jan-11", "…
## $ ri      <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.018…
## $ MKT     <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.…
## $ SMB     <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.00…
## $ HML     <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.0…
## $ company <chr> "Apple Inc.", "Apple Inc.", "Apple Inc.", "Apple Inc.", "Apple…
data %>%
  group_by(company, symbol) %>%
  summarise(
    N          = n(),
    Mean_ri    = round(mean(ri),    6),
    SD_ri      = round(sd(ri),      6),
    Mean_MKT   = round(mean(MKT),   6),
    Mean_SMB   = round(mean(SMB),   6),
    Mean_HML   = round(mean(HML),   6),
    .groups = "drop"
  ) %>%
  kable(caption = "Descriptive statistics by company")
Descriptive statistics by company
company symbol N Mean_ri SD_ri Mean_MKT Mean_SMB Mean_HML
Apple Inc. AAPL 1257 0.000697 0.016804 0.000377 2e-06 0.00013
Ford Motor Co. FORD 1257 -0.000583 0.055493 0.000377 2e-06 0.00013
General Electric GE 1257 0.000561 0.013450 0.000377 2e-06 0.00013
General Motors GM 1257 -0.000008 0.018947 0.000377 2e-06 0.00013
IBM Corp. IBM 1257 -0.000055 0.012213 0.000377 2e-06 0.00013
Microsoft Corp. MSFT 1257 0.000654 0.014791 0.000377 2e-06 0.00013

3 Step 0 — Time-Series Regressions

For each of the N = 6 companies, regress daily excess returns \(r_{i,t}\) on the three Fama-French factors:

\[r_{i,t} = \alpha_i + \beta_i^{MKT} \cdot MKT_t + \beta_i^{SMB} \cdot SMB_t + \beta_i^{HML} \cdot HML_t + \varepsilon_{i,t}\]

step0 <- data %>%
  nest(data = c(date, ri, MKT, SMB, HML)) %>%
  mutate(
    model     = map(data, ~ lm(ri ~ MKT + SMB + HML, data = .x)),
    estimates = map(model, tidy),
    glance_   = map(model, glance)
  ) %>%
  unnest(estimates) %>%
  select(symbol, company, estimate, term) %>%
  pivot_wider(names_from = term, values_from = estimate) %>%
  rename(
    alpha = `(Intercept)`,
    b_MKT = MKT,
    b_SMB = SMB,
    b_HML = HML
  )

# Extract R-squared separately
r2_df <- data %>%
  nest(data = c(date, ri, MKT, SMB, HML)) %>%
  mutate(r2 = map_dbl(data, ~ summary(lm(ri ~ MKT + SMB + HML, data = .x))$r.squared)) %>%
  select(symbol, r2)

step0 <- step0 %>% left_join(r2_df, by = "symbol")

step0 %>%
  select(company, symbol, alpha, b_MKT, b_SMB, b_HML, r2) %>%
  mutate(across(where(is.numeric), ~ round(.x, 4))) %>%
  rename(
    Company  = company,
    Ticker   = symbol,
    Alpha    = alpha,
    `β MKT`  = b_MKT,
    `β SMB`  = b_SMB,
    `β HML`  = b_HML,
    ``     = r2
  ) %>%
  kable(caption = "Step 0: Estimated factor betas per company")
Step 0: Estimated factor betas per company
Company Ticker Alpha β MKT β SMB β HML
Apple Inc. AAPL 4e-04 0.9000 0.0685 -0.0578 0.2729
Ford Motor Co. FORD -8e-04 0.5129 -0.2644 0.1380 0.0090
General Electric GE 1e-04 1.0779 0.0994 0.0902 0.6125
General Motors GM -5e-04 1.2854 0.0039 -0.0222 0.4379
IBM Corp. IBM -4e-04 0.8169 0.0336 -0.0121 0.4255
Microsoft Corp. MSFT 3e-04 0.9656 0.0582 -0.0641 0.4054

3.1 Beta visualisation

step0 %>%
  select(company, b_MKT, b_SMB, b_HML) %>%
  pivot_longer(cols = starts_with("b_"),
               names_to  = "factor",
               values_to = "beta") %>%
  mutate(
    factor  = recode(factor,
                     b_MKT = "β MKT", b_SMB = "β SMB", b_HML = "β HML"),
    company = stringr::str_wrap(company, 12)
  ) %>%
  ggplot(aes(x = company, y = beta, fill = factor)) +
  geom_col(position = position_dodge(0.75), width = 0.65) +
  geom_hline(yintercept = 0, linewidth = 0.4, colour = "grey40") +
  scale_fill_manual(
    values = c("β MKT" = "#185FA5", "β SMB" = "#0F6E56", "β HML" = "#993C1D"),
    name = NULL
  ) +
  labs(
    title    = "Factor betas by company",
    subtitle = "Estimated from full-sample time-series regressions",
    x = NULL, y = "Beta coefficient"
  )


4 Step 1 — Cross-Sectional Regressions

Merge betas back onto the panel, then for each of the T = 1257 trading days run a cross-sectional regression of returns on betas:

\[r_{i,t} = \lambda_t^0 + \lambda_t^{MKT} \hat{\beta}_i^{MKT} + \lambda_t^{SMB} \hat{\beta}_i^{SMB} + \lambda_t^{HML} \hat{\beta}_i^{HML} + u_{i,t}\]

# Merge betas onto full panel
panel <- data %>%
  left_join(step0 %>% select(symbol, b_MKT, b_SMB, b_HML), by = "symbol")

# Run cross-sectional regression for every date
step1 <- panel %>%
  nest(data = c(symbol, company, ri, b_MKT, b_SMB, b_HML)) %>%
  mutate(
    estimates = map(data, ~ tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x)))
  ) %>%
  unnest(estimates) %>%
  select(date, estimate, term) %>%
  pivot_wider(names_from = term, values_from = estimate) %>%
  rename(
    intercept  = `(Intercept)`,
    lambda_MKT = b_MKT,
    lambda_SMB = b_SMB,
    lambda_HML = b_HML
  )

head(step1, 10) %>%
  mutate(across(where(is.numeric), ~ round(.x, 6))) %>%
  kable(caption = "Step 1: First 10 daily cross-sectional premia (λ̂_t)")
Step 1: First 10 daily cross-sectional premia (λ̂_t)
date intercept lambda_MKT lambda_SMB lambda_HML
4-Jan-11 -0.029761 0.041629 -0.025520 0.057372
5-Jan-11 0.022334 -0.011347 -0.158046 0.062847
6-Jan-11 -0.029243 0.037301 0.007029 -0.173234
7-Jan-11 -0.017503 0.012722 0.032269 -0.064226
10-Jan-11 0.036414 -0.036631 0.017123 0.058646
11-Jan-11 0.002744 0.004089 -0.095361 0.089858
12-Jan-11 0.072331 -0.055365 -0.164496 0.043036
13-Jan-11 0.015027 -0.019357 0.001815 0.025630
14-Jan-11 0.019775 -0.016486 0.063259 0.039214
18-Jan-11 -0.018911 0.010146 0.052508 -0.090027

4.1 Distribution of daily premia

step1 %>%
  select(date, lambda_MKT, lambda_SMB, lambda_HML) %>%
  pivot_longer(cols = starts_with("lambda_"),
               names_to  = "factor",
               values_to = "lambda") %>%
  mutate(factor = recode(factor,
    lambda_MKT = "λ MKT", lambda_SMB = "λ SMB", lambda_HML = "λ HML")) %>%
  ggplot(aes(x = lambda, fill = factor, colour = factor)) +
  geom_histogram(bins = 50, alpha = 0.55, position = "identity") +
  geom_vline(xintercept = 0, linewidth = 0.5, colour = "grey30", linetype = "dashed") +
  scale_fill_manual(
    values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
    name = NULL
  ) +
  scale_colour_manual(
    values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
    name = NULL
  ) +
  facet_wrap(~ factor, scales = "free_x") +
  labs(
    title    = "Distribution of daily cross-sectional risk premia",
    subtitle = "Each observation is one trading day's estimated λ̂_t",
    x = "Daily premium (λ)", y = "Count"
  ) +
  theme(legend.position = "none")


5 Step 2 — Fama-MacBeth Inference

Average each daily premium series and test \(H_0: \bar{\lambda} = 0\) via a one-sample t-test. This is exactly the t.test(step1$b_MKT, mu = 0) call from the original R script.

mkt_test <- t.test(step1$lambda_MKT, mu = 0)
smb_test <- t.test(step1$lambda_SMB, mu = 0)
hml_test <- t.test(step1$lambda_HML, mu = 0)

mkt_test
## 
##  One Sample t-test
## 
## data:  step1$lambda_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.002546371  0.001722208
## sample estimates:
##     mean of x 
## -0.0004120813
smb_test
## 
##  One Sample t-test
## 
## data:  step1$lambda_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003711466  0.011076953
## sample estimates:
##   mean of x 
## 0.003682744
hml_test
## 
##  One Sample t-test
## 
## data:  step1$lambda_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.005541205  0.004607776
## sample estimates:
##     mean of x 
## -0.0004667146

5.1 Summary table

fm_results <- tibble(
  Factor      = c("Market (λ MKT)", "Size (λ SMB)", "Value (λ HML)"),
  `Mean λ`    = c(mean(step1$lambda_MKT), mean(step1$lambda_SMB), mean(step1$lambda_HML)),
  `Std Dev`   = c(sd(step1$lambda_MKT),   sd(step1$lambda_SMB),   sd(step1$lambda_HML)),
  `Std Error` = c(mkt_test$stderr,        smb_test$stderr,        hml_test$stderr),
  `t-stat`    = c(mkt_test$statistic,     smb_test$statistic,     hml_test$statistic),
  `p-value`   = c(mkt_test$p.value,       smb_test$p.value,       hml_test$p.value),
  `CI lower`  = c(mkt_test$conf.int[1],   smb_test$conf.int[1],   hml_test$conf.int[1]),
  `CI upper`  = c(mkt_test$conf.int[2],   smb_test$conf.int[2],   hml_test$conf.int[2]),
  Significant = c(
    ifelse(mkt_test$p.value < 0.05, "Yes ✓", "No"),
    ifelse(smb_test$p.value < 0.05, "Yes ✓", "No"),
    ifelse(hml_test$p.value < 0.05, "Yes ✓", "No")
  )
) %>%
  mutate(across(where(is.numeric), ~ round(.x, 6)))

kable(fm_results,
      caption = "Fama-MacBeth results — one-sample t-tests vs. μ = 0 (95% confidence)")
Fama-MacBeth results — one-sample t-tests vs. μ = 0 (95% confidence)
Factor Mean λ Std Dev Std Error t-stat p-value CI lower CI upper Significant
Market (λ MKT) -0.000412 0.038570 0.001088 -0.378788 0.704909 -0.002546 0.001722 No
Size (λ SMB) 0.003683 0.133626 0.003769 0.977117 0.328699 -0.003711 0.011077 No
Value (λ HML) -0.000467 0.091705 0.002587 -0.180437 0.856839 -0.005541 0.004608 No

5.2 Coefficient plot

tibble(
  factor  = c("λ MKT", "λ SMB", "λ HML"),
  mean    = c(mean(step1$lambda_MKT), mean(step1$lambda_SMB), mean(step1$lambda_HML)),
  ci_lo   = c(mkt_test$conf.int[1],  smb_test$conf.int[1],  hml_test$conf.int[1]),
  ci_hi   = c(mkt_test$conf.int[2],  smb_test$conf.int[2],  hml_test$conf.int[2]),
  sig     = c(mkt_test$p.value < 0.05, smb_test$p.value < 0.05, hml_test$p.value < 0.05)
) %>%
  ggplot(aes(x = factor, y = mean, colour = factor)) +
  geom_hline(yintercept = 0, linewidth = 0.5, colour = "grey40", linetype = "dashed") +
  geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi), width = 0.15, linewidth = 1) +
  geom_point(size = 4) +
  scale_colour_manual(
    values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
    guide = "none"
  ) +
  labs(
    title    = "Fama-MacBeth risk premia with 95% confidence intervals",
    subtitle = "Dashed line = 0 (null hypothesis); error bars = 95% CI from t-test",
    x = NULL, y = "Average daily premium (λ̄)"
  )


6 Interpretation

## Market factor (λ MKT):  mean = -0.000412,  t = -0.3788,  p = 0.7049  →  Not significant at 5%
## Size factor   (λ SMB):  mean = 0.003683,  t = 0.9771,  p = 0.3287  →  Not significant at 5%
## Value factor  (λ HML):  mean = -0.000467,  t = -0.1804,  p = 0.8568  →  Not significant at 5%

Key takeaways:

  • None of the three Fama-French factors command a statistically significant daily risk premium in this sample (all p-values > 0.05).
  • This is expected with only N = 6 cross-sections: Fama-MacBeth requires a large cross-section to precisely identify risk premia. The original Fama-French studies used hundreds of portfolios.
  • The standard error correction is the main advantage of Fama-MacBeth over pooled OLS — it accounts for cross-sectional correlation in residuals across firms on the same day.

Methodology: Fama, E.F. & MacBeth, J.D. (1973). Risk, return, and equilibrium: Empirical tests. Journal of Political Economy, 81(3), 607–636.