1 Overview

The Fama-MacBeth (1973) procedure estimates risk premia for systematic factors by correcting standard errors for cross-sectional correlation. It is the standard approach when we have more cross-sections than time periods.

Two-step procedure:

Step	Action	Output
Step 0	N time-series regressions (one per asset)	Factor betas $\hat{\beta}_i$
Step 1	T cross-sectional regressions (one per day)	Daily premia $\hat{\lambda}_t$
Step 2	Average premia + t-test vs. $\mu = 0$	Risk-premium inference

2 Setup

library(dplyr)
library(tidyr)
library(purrr)
library(broom)
library(ggplot2)
library(knitr)

theme_set(
  theme_minimal(base_size = 12) +
    theme(
      plot.title       = element_text(face = "bold", size = 13),
      plot.subtitle    = element_text(colour = "grey50", size = 11),
      panel.grid.minor = element_blank(),
      axis.title       = element_text(size = 11),
      legend.position  = "bottom"
    )
)

data <- read.csv("data.csv")

# Rename tickers to full company names
data <- data %>%
  mutate(company = recode(symbol,
    "AAPL" = "Apple Inc.",
    "FORD" = "Ford Motor Co.",
    "GE"   = "General Electric",
    "GM"   = "General Motors",
    "IBM"  = "IBM Corp.",
    "MSFT" = "Microsoft Corp."
  ))

glimpse(data)

## Rows: 7,542
## Columns: 7
## $ symbol  <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL"…
## $ date    <chr> "4-Jan-11", "5-Jan-11", "6-Jan-11", "7-Jan-11", "10-Jan-11", "…
## $ ri      <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.018…
## $ MKT     <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.…
## $ SMB     <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.00…
## $ HML     <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.0…
## $ company <chr> "Apple Inc.", "Apple Inc.", "Apple Inc.", "Apple Inc.", "Apple…

data %>%
  group_by(company, symbol) %>%
  summarise(
    N          = n(),
    Mean_ri    = round(mean(ri),    6),
    SD_ri      = round(sd(ri),      6),
    Mean_MKT   = round(mean(MKT),   6),
    Mean_SMB   = round(mean(SMB),   6),
    Mean_HML   = round(mean(HML),   6),
    .groups = "drop"
  ) %>%
  kable(caption = "Descriptive statistics by company")

Descriptive statistics by company
company	symbol	N	Mean_ri	SD_ri	Mean_MKT	Mean_SMB	Mean_HML
Apple Inc.	AAPL	1257	0.000697	0.016804	0.000377	2e-06	0.00013
Ford Motor Co.	FORD	1257	-0.000583	0.055493	0.000377	2e-06	0.00013
General Electric	GE	1257	0.000561	0.013450	0.000377	2e-06	0.00013
General Motors	GM	1257	-0.000008	0.018947	0.000377	2e-06	0.00013
IBM Corp.	IBM	1257	-0.000055	0.012213	0.000377	2e-06	0.00013
Microsoft Corp.	MSFT	1257	0.000654	0.014791	0.000377	2e-06	0.00013

3 Step 0 — Time-Series Regressions

For each of the N = 6 companies, regress daily excess returns $r_{i,t}$ on the three Fama-French factors:

\[r_{i,t} = \alpha_i + \beta_i^{MKT} \cdot MKT_t + \beta_i^{SMB} \cdot SMB_t + \beta_i^{HML} \cdot HML_t + \varepsilon_{i,t}\]

step0 <- data %>%
  nest(data = c(date, ri, MKT, SMB, HML)) %>%
  mutate(
    model     = map(data, ~ lm(ri ~ MKT + SMB + HML, data = .x)),
    estimates = map(model, tidy),
    glance_   = map(model, glance)
  ) %>%
  unnest(estimates) %>%
  select(symbol, company, estimate, term) %>%
  pivot_wider(names_from = term, values_from = estimate) %>%
  rename(
    alpha = `(Intercept)`,
    b_MKT = MKT,
    b_SMB = SMB,
    b_HML = HML
  )

# Extract R-squared separately
r2_df <- data %>%
  nest(data = c(date, ri, MKT, SMB, HML)) %>%
  mutate(r2 = map_dbl(data, ~ summary(lm(ri ~ MKT + SMB + HML, data = .x))$r.squared)) %>%
  select(symbol, r2)

step0 <- step0 %>% left_join(r2_df, by = "symbol")

step0 %>%
  select(company, symbol, alpha, b_MKT, b_SMB, b_HML, r2) %>%
  mutate(across(where(is.numeric), ~ round(.x, 4))) %>%
  rename(
    Company  = company,
    Ticker   = symbol,
    Alpha    = alpha,
    `β MKT`  = b_MKT,
    `β SMB`  = b_SMB,
    `β HML`  = b_HML,
    `R²`     = r2
  ) %>%
  kable(caption = "Step 0: Estimated factor betas per company")

Step 0: Estimated factor betas per company
Company	Ticker	Alpha	β MKT	β SMB	β HML	R²
Apple Inc.	AAPL	4e-04	0.9000	0.0685	-0.0578	0.2729
Ford Motor Co.	FORD	-8e-04	0.5129	-0.2644	0.1380	0.0090
General Electric	GE	1e-04	1.0779	0.0994	0.0902	0.6125
General Motors	GM	-5e-04	1.2854	0.0039	-0.0222	0.4379
IBM Corp.	IBM	-4e-04	0.8169	0.0336	-0.0121	0.4255
Microsoft Corp.	MSFT	3e-04	0.9656	0.0582	-0.0641	0.4054

3.1 Beta visualisation

step0 %>%
  select(company, b_MKT, b_SMB, b_HML) %>%
  pivot_longer(cols = starts_with("b_"),
               names_to  = "factor",
               values_to = "beta") %>%
  mutate(
    factor  = recode(factor,
                     b_MKT = "β MKT", b_SMB = "β SMB", b_HML = "β HML"),
    company = stringr::str_wrap(company, 12)
  ) %>%
  ggplot(aes(x = company, y = beta, fill = factor)) +
  geom_col(position = position_dodge(0.75), width = 0.65) +
  geom_hline(yintercept = 0, linewidth = 0.4, colour = "grey40") +
  scale_fill_manual(
    values = c("β MKT" = "#185FA5", "β SMB" = "#0F6E56", "β HML" = "#993C1D"),
    name = NULL
  ) +
  labs(
    title    = "Factor betas by company",
    subtitle = "Estimated from full-sample time-series regressions",
    x = NULL, y = "Beta coefficient"
  )

4 Step 1 — Cross-Sectional Regressions

Merge betas back onto the panel, then for each of the T = 1257 trading days run a cross-sectional regression of returns on betas:

\[r_{i,t} = \lambda_t^0 + \lambda_t^{MKT} \hat{\beta}_i^{MKT} + \lambda_t^{SMB} \hat{\beta}_i^{SMB} + \lambda_t^{HML} \hat{\beta}_i^{HML} + u_{i,t}\]

# Merge betas onto full panel
panel <- data %>%
  left_join(step0 %>% select(symbol, b_MKT, b_SMB, b_HML), by = "symbol")

# Run cross-sectional regression for every date
step1 <- panel %>%
  nest(data = c(symbol, company, ri, b_MKT, b_SMB, b_HML)) %>%
  mutate(
    estimates = map(data, ~ tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x)))
  ) %>%
  unnest(estimates) %>%
  select(date, estimate, term) %>%
  pivot_wider(names_from = term, values_from = estimate) %>%
  rename(
    intercept  = `(Intercept)`,
    lambda_MKT = b_MKT,
    lambda_SMB = b_SMB,
    lambda_HML = b_HML
  )

head(step1, 10) %>%
  mutate(across(where(is.numeric), ~ round(.x, 6))) %>%
  kable(caption = "Step 1: First 10 daily cross-sectional premia (λ̂_t)")

Step 1: First 10 daily cross-sectional premia (λ̂_t)
date	intercept	lambda_MKT	lambda_SMB	lambda_HML
4-Jan-11	-0.029761	0.041629	-0.025520	0.057372
5-Jan-11	0.022334	-0.011347	-0.158046	0.062847
6-Jan-11	-0.029243	0.037301	0.007029	-0.173234
7-Jan-11	-0.017503	0.012722	0.032269	-0.064226
10-Jan-11	0.036414	-0.036631	0.017123	0.058646
11-Jan-11	0.002744	0.004089	-0.095361	0.089858
12-Jan-11	0.072331	-0.055365	-0.164496	0.043036
13-Jan-11	0.015027	-0.019357	0.001815	0.025630
14-Jan-11	0.019775	-0.016486	0.063259	0.039214
18-Jan-11	-0.018911	0.010146	0.052508	-0.090027

4.1 Distribution of daily premia

step1 %>%
  select(date, lambda_MKT, lambda_SMB, lambda_HML) %>%
  pivot_longer(cols = starts_with("lambda_"),
               names_to  = "factor",
               values_to = "lambda") %>%
  mutate(factor = recode(factor,
    lambda_MKT = "λ MKT", lambda_SMB = "λ SMB", lambda_HML = "λ HML")) %>%
  ggplot(aes(x = lambda, fill = factor, colour = factor)) +
  geom_histogram(bins = 50, alpha = 0.55, position = "identity") +
  geom_vline(xintercept = 0, linewidth = 0.5, colour = "grey30", linetype = "dashed") +
  scale_fill_manual(
    values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
    name = NULL
  ) +
  scale_colour_manual(
    values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
    name = NULL
  ) +
  facet_wrap(~ factor, scales = "free_x") +
  labs(
    title    = "Distribution of daily cross-sectional risk premia",
    subtitle = "Each observation is one trading day's estimated λ̂_t",
    x = "Daily premium (λ)", y = "Count"
  ) +
  theme(legend.position = "none")

5 Step 2 — Fama-MacBeth Inference

Average each daily premium series and test $H_0: \bar{\lambda} = 0$ via a one-sample t-test. This is exactly the t.test(step1$b_MKT, mu = 0) call from the original R script.

mkt_test <- t.test(step1$lambda_MKT, mu = 0)
smb_test <- t.test(step1$lambda_SMB, mu = 0)
hml_test <- t.test(step1$lambda_HML, mu = 0)

mkt_test

## 
##  One Sample t-test
## 
## data:  step1$lambda_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.002546371  0.001722208
## sample estimates:
##     mean of x 
## -0.0004120813

smb_test

## 
##  One Sample t-test
## 
## data:  step1$lambda_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003711466  0.011076953
## sample estimates:
##   mean of x 
## 0.003682744

hml_test

## 
##  One Sample t-test
## 
## data:  step1$lambda_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.005541205  0.004607776
## sample estimates:
##     mean of x 
## -0.0004667146

5.1 Summary table

fm_results <- tibble(
  Factor      = c("Market (λ MKT)", "Size (λ SMB)", "Value (λ HML)"),
  `Mean λ`    = c(mean(step1$lambda_MKT), mean(step1$lambda_SMB), mean(step1$lambda_HML)),
  `Std Dev`   = c(sd(step1$lambda_MKT),   sd(step1$lambda_SMB),   sd(step1$lambda_HML)),
  `Std Error` = c(mkt_test$stderr,        smb_test$stderr,        hml_test$stderr),
  `t-stat`    = c(mkt_test$statistic,     smb_test$statistic,     hml_test$statistic),
  `p-value`   = c(mkt_test$p.value,       smb_test$p.value,       hml_test$p.value),
  `CI lower`  = c(mkt_test$conf.int[1],   smb_test$conf.int[1],   hml_test$conf.int[1]),
  `CI upper`  = c(mkt_test$conf.int[2],   smb_test$conf.int[2],   hml_test$conf.int[2]),
  Significant = c(
    ifelse(mkt_test$p.value < 0.05, "Yes ✓", "No"),
    ifelse(smb_test$p.value < 0.05, "Yes ✓", "No"),
    ifelse(hml_test$p.value < 0.05, "Yes ✓", "No")
  )
) %>%
  mutate(across(where(is.numeric), ~ round(.x, 6)))

kable(fm_results,
      caption = "Fama-MacBeth results — one-sample t-tests vs. μ = 0 (95% confidence)")

Fama-MacBeth results — one-sample t-tests vs. μ = 0 (95% confidence)
Factor	Mean λ	Std Dev	Std Error	t-stat	p-value	CI lower	CI upper	Significant
Market (λ MKT)	-0.000412	0.038570	0.001088	-0.378788	0.704909	-0.002546	0.001722	No
Size (λ SMB)	0.003683	0.133626	0.003769	0.977117	0.328699	-0.003711	0.011077	No
Value (λ HML)	-0.000467	0.091705	0.002587	-0.180437	0.856839	-0.005541	0.004608	No

5.2 Coefficient plot

tibble(
  factor  = c("λ MKT", "λ SMB", "λ HML"),
  mean    = c(mean(step1$lambda_MKT), mean(step1$lambda_SMB), mean(step1$lambda_HML)),
  ci_lo   = c(mkt_test$conf.int[1],  smb_test$conf.int[1],  hml_test$conf.int[1]),
  ci_hi   = c(mkt_test$conf.int[2],  smb_test$conf.int[2],  hml_test$conf.int[2]),
  sig     = c(mkt_test$p.value < 0.05, smb_test$p.value < 0.05, hml_test$p.value < 0.05)
) %>%
  ggplot(aes(x = factor, y = mean, colour = factor)) +
  geom_hline(yintercept = 0, linewidth = 0.5, colour = "grey40", linetype = "dashed") +
  geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi), width = 0.15, linewidth = 1) +
  geom_point(size = 4) +
  scale_colour_manual(
    values = c("λ MKT" = "#185FA5", "λ SMB" = "#0F6E56", "λ HML" = "#993C1D"),
    guide = "none"
  ) +
  labs(
    title    = "Fama-MacBeth risk premia with 95% confidence intervals",
    subtitle = "Dashed line = 0 (null hypothesis); error bars = 95% CI from t-test",
    x = NULL, y = "Average daily premium (λ̄)"
  )

6 Interpretation

## Market factor (λ MKT):  mean = -0.000412,  t = -0.3788,  p = 0.7049  →  Not significant at 5%
## Size factor   (λ SMB):  mean = 0.003683,  t = 0.9771,  p = 0.3287  →  Not significant at 5%
## Value factor  (λ HML):  mean = -0.000467,  t = -0.1804,  p = 0.8568  →  Not significant at 5%

Key takeaways:

None of the three Fama-French factors command a statistically significant daily risk premium in this sample (all p-values > 0.05).
This is expected with only N = 6 cross-sections: Fama-MacBeth requires a large cross-section to precisely identify risk premia. The original Fama-French studies used hundreds of portfolios.
The standard error correction is the main advantage of Fama-MacBeth over pooled OLS — it accounts for cross-sectional correlation in residuals across firms on the same day.

Methodology: Fama, E.F. & MacBeth, J.D. (1973). Risk, return, and equilibrium: Empirical tests. Journal of Political Economy, 81(3), 607–636.

Fama-MacBeth Regression in R

Three-Factor Model — Daily Returns 2011–2015

Empirical Asset Pricing

May 19, 2026