Install & Load Packages

# Install packages (only run once — comment out after first run)
# install.packages(c("broom", "tidyverse"))

library(broom)
library(tidyverse)

Load Data

data <- read.csv("~/Downloads/data.csv", stringsAsFactors = FALSE)

# Parse dates — handles formats like "4-Jan-11" and "2011-01-04"
data$date <- as.Date(data$date, tryFormats = c("%d-%b-%y", "%Y-%m-%d", "%m/%d/%Y"))

# Quick preview
glimpse(data)
## Rows: 7,542
## Columns: 6
## $ symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL",…
## $ date   <date> 2011-01-04, 2011-01-05, 2011-01-06, 2011-01-07, 2011-01-10, 20…
## $ ri     <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.0186…
## $ MKT    <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.0…
## $ SMB    <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.002…
## $ HML    <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.00…

Step 0 — Time-Series Regression (Estimate Factor Betas)

For each stock (symbol), estimate the Fama-French three-factor model:

\[r_i = \alpha + \beta_{MKT} \cdot MKT + \beta_{SMB} \cdot SMB + \beta_{HML} \cdot HML + \varepsilon\]

betas <- data %>%
  group_by(symbol) %>%
  nest() %>%
  mutate(
    model       = map(data, ~ lm(ri ~ MKT + SMB + HML, data = .x)),
    tidy_model  = map(model, tidy)
  ) %>%
  unnest(tidy_model) %>%
  # Drop the intercept row; keep only factor betas
  filter(term != "(Intercept)") %>%
  select(symbol, term, estimate) %>%
  pivot_wider(names_from  = term,
              values_from = estimate) %>%
  rename(
    b_MKT = MKT,
    b_SMB = SMB,
    b_HML = HML
  )

head(betas)
## # A tibble: 6 × 4
## # Groups:   symbol [6]
##   symbol b_MKT    b_SMB   b_HML
##   <chr>  <dbl>    <dbl>   <dbl>
## 1 AAPL   0.900  0.0685  -0.0578
## 2 FORD   0.513 -0.264    0.138 
## 3 GE     1.08   0.0994   0.0902
## 4 GM     1.29   0.00390 -0.0222
## 5 IBM    0.817  0.0336  -0.0121
## 6 MSFT   0.966  0.0582  -0.0641
# Merge estimated betas back into the original data
data_with_betas <- data %>%
  left_join(betas, by = "symbol")

Step 1 — Cross-Sectional Regression (Each Date)

For each date \(t\), regress realized returns on the pre-estimated betas:

\[r_{i,t} = \lambda_0 + \lambda_{MKT} \hat{\beta}_{MKT,i} + \lambda_{SMB} \hat{\beta}_{SMB,i} + \lambda_{HML} \hat{\beta}_{HML,i} + \varepsilon_{i,t}\]

risk_premiums <- data_with_betas %>%
  group_by(date) %>%
  nest() %>%
  mutate(
    model      = map(data, ~ lm(ri ~ b_MKT + b_SMB + b_HML, data = .x)),
    tidy_model = map(model, tidy)
  ) %>%
  unnest(tidy_model) %>%
  # Keep only the slope estimates (risk premiums); drop the intercept
  filter(term != "(Intercept)") %>%
  select(date, term, estimate) %>%
  pivot_wider(names_from  = term,
              values_from = estimate) %>%
  rename(
    lambda_MKT = b_MKT,
    lambda_SMB = b_SMB,
    lambda_HML = b_HML
  )

head(risk_premiums)
## # A tibble: 6 × 4
## # Groups:   date [6]
##   date       lambda_MKT lambda_SMB lambda_HML
##   <date>          <dbl>      <dbl>      <dbl>
## 1 2011-01-04    0.0416    -0.0255      0.0574
## 2 2011-01-05   -0.0113    -0.158       0.0628
## 3 2011-01-06    0.0373     0.00703    -0.173 
## 4 2011-01-07    0.0127     0.0323     -0.0642
## 5 2011-01-10   -0.0366     0.0171      0.0586
## 6 2011-01-11    0.00409   -0.0954      0.0899

Step 2 — Time-Series Averages & Fama-MacBeth t-Tests

Average Risk Premiums

# Select only the lambda columns (drop the date column)
lambda_cols <- risk_premiums %>%
  ungroup() %>%
  select(lambda_MKT, lambda_SMB, lambda_HML)

avg_premiums <- colMeans(lambda_cols, na.rm = TRUE)
print(avg_premiums)
##    lambda_MKT    lambda_SMB    lambda_HML 
## -0.0004120813  0.0036827439 -0.0004667146

T-Tests (H₀: λ = 0)

t_MKT <- t.test(risk_premiums$lambda_MKT, mu = 0)
t_SMB <- t.test(risk_premiums$lambda_SMB, mu = 0)
t_HML <- t.test(risk_premiums$lambda_HML, mu = 0)

print(t_MKT)
## 
##  One Sample t-test
## 
## data:  risk_premiums$lambda_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.002546371  0.001722208
## sample estimates:
##     mean of x 
## -0.0004120813
print(t_SMB)
## 
##  One Sample t-test
## 
## data:  risk_premiums$lambda_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003711466  0.011076953
## sample estimates:
##   mean of x 
## 0.003682744
print(t_HML)
## 
##  One Sample t-test
## 
## data:  risk_premiums$lambda_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.005541205  0.004607776
## sample estimates:
##     mean of x 
## -0.0004667146

Summary Table

summary_tbl <- tibble(
  Factor    = c("MKT", "SMB", "HML"),
  Avg_Lambda = avg_premiums,
  t_stat    = c(t_MKT$statistic,
                t_SMB$statistic,
                t_HML$statistic),
  p_value   = c(t_MKT$p.value,
                t_SMB$p.value,
                t_HML$p.value),
  Significant = ifelse(
    c(t_MKT$p.value, t_SMB$p.value, t_HML$p.value) < 0.05,
    "Yes", "No"
  )
)

knitr::kable(
  summary_tbl,
  digits  = 4,
  caption = "Fama-MacBeth Risk Premium Estimates"
)
Fama-MacBeth Risk Premium Estimates
Factor Avg_Lambda t_stat p_value Significant
MKT -0.0004 -0.3788 0.7049 No
SMB 0.0037 0.9771 0.3287 No
HML -0.0005 -0.1804 0.8568 No