# Corrected R Code for Fama-MacBeth Replication
# Only run this line ONCE to install necessary packages
# install.packages(c("broom", "tidyverse", "plm", "lmtest", "sandwich"))
library(broom)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plm)
##
## Attaching package: 'plm'
##
## The following objects are masked from 'package:dplyr':
##
## between, lag, lead
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
# Load dataset (make sure data.csv is in the working directory)
data <- read.csv("data.csv")
# Step 0: Estimate N time-series regressions (by symbol)
step0 <- data %>%
nest(data = c(date, ri, MKT, SMB, HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
)) %>%
unnest(estimates) %>%
select(symbol, estimate, term) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
select(symbol, b_MKT = MKT, b_HML = HML, b_SMB = SMB)
# Merge betas back into original dataset
step0 <- data %>%
left_join(step0, by = "symbol")
# Step 1: Estimate T cross-sectional regressions (by date)
step1 <- step0 %>%
nest(data = c(symbol, ri, b_MKT, b_SMB, b_HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x))
)) %>%
unnest(estimates) %>%
select(date, estimate, term) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
select(date, b_MKT, b_HML, b_SMB)
# Step 2: Estimate time series averages with t-tests
t.test(step1$b_MKT, mu = 0)
##
## One Sample t-test
##
## data: step1$b_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002546371 0.001722208
## sample estimates:
## mean of x
## -0.0004120813
t.test(step1$b_SMB, mu = 0)
##
## One Sample t-test
##
## data: step1$b_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003711466 0.011076953
## sample estimates:
## mean of x
## 0.003682744
t.test(step1$b_HML, mu = 0)
##
## One Sample t-test
##
## data: step1$b_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005541205 0.004607776
## sample estimates:
## mean of x
## -0.0004667146