###################################
# Fama-MacBeth Regression in R
###################################
# Provides standard errors corrected for cross sectional correlation
# Better when we have more cross-sections but less time series data
#
# Two step procedure:
# Step0: Perform Time Series Regression (N Time series Regression)
# Regress portfolio/asset's return on risk factors and obtain the regression coefficients
#
# Step1: Perform Cross-sectional Regression (T cross-sectional regression)
# Regress portfolio/asset's return on the regression coefficients obtained in Step0
#
# Step2: Average the Coefficients
# Take average of coefficients obtained from Step2
#
# cannot be used with cross-sectional in-variant variables
#######################################################
#
#######################################################
library(broom)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("data.csv")
# Estimate N Time series Regression
step0 <- data %>%
nest(data = c(date,ri,MKT,SMB,HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
)) %>%
unnest(estimates) %>%
select(symbol, estimate, term) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
select(symbol,
b_MKT = MKT,
b_HML = HML,
b_SMB = SMB)
step0 <- data %>%
left_join(step0, by = "symbol")
# Estimate T Cross-sectional regression
step1 <- step0 %>%
nest(data = c(symbol, ri, b_MKT, b_SMB, b_HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x))
)) %>%
unnest(estimates) %>%
select(date, estimate, term) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
select(date, b_MKT, b_HML, b_SMB)
# Step2: Estimate time series averages
t.test(step1$b_MKT, mu=0)
##
## One Sample t-test
##
## data: step1$b_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002546371 0.001722208
## sample estimates:
## mean of x
## -0.0004120813
t.test(step1$b_SMB, mu=0)
##
## One Sample t-test
##
## data: step1$b_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003711466 0.011076953
## sample estimates:
## mean of x
## 0.003682744
t.test(step1$b_HML, mu=0)
##
## One Sample t-test
##
## data: step1$b_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005541205 0.004607776
## sample estimates:
## mean of x
## -0.0004667146