###################################
#  Fama-MacBeth Regression in R 
###################################

# Provides standard errors corrected for cross sectional correlation
# Better when we have more cross-sections but less time series data
# 
# Two step procedure:
# Step0: Perform Time Series Regression (N Time series Regression)
# Regress portfolio/asset's return on risk factors and obtain the regression coefficients
# 
# Step1: Perform Cross-sectional Regression (T cross-sectional regression)
# Regress portfolio/asset's return on the regression coefficients obtained in Step0
# 
# Step2: Average the Coefficients
# Take average of coefficients obtained from Step2
# 
# cannot be used with cross-sectional in-variant variables

#######################################################
# 
#######################################################


library(broom)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("data.csv")


# Estimate N Time series Regression

step0 <- data %>% 
  nest(data = c(date,ri,MKT,SMB,HML)) %>% 
  mutate(estimates = map(
    data,
    ~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
  )) %>% 
  unnest(estimates) %>% 
  select(symbol, estimate, term) %>% 
  pivot_wider(names_from  = term,
              values_from = estimate) %>% 
  select(symbol, 
         b_MKT = MKT, 
         b_HML = HML, 
         b_SMB = SMB)


step0 <- data %>% 
  left_join(step0, by = "symbol")


# Estimate T Cross-sectional regression

step1 <- step0 %>% 
    nest(data = c(symbol, ri, b_MKT, b_SMB, b_HML)) %>% 
  mutate(estimates = map(
    data,
    ~tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x))
  )) %>%
  unnest(estimates) %>% 
  select(date, estimate, term) %>% 
  pivot_wider(names_from  = term,
              values_from = estimate) %>% 
  select(date, b_MKT, b_HML, b_SMB)


# Step2: Estimate time series averages
t.test(step1$b_MKT, mu=0)
## 
##  One Sample t-test
## 
## data:  step1$b_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.002546371  0.001722208
## sample estimates:
##     mean of x 
## -0.0004120813
t.test(step1$b_SMB, mu=0)
## 
##  One Sample t-test
## 
## data:  step1$b_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003711466  0.011076953
## sample estimates:
##   mean of x 
## 0.003682744
t.test(step1$b_HML, mu=0)
## 
##  One Sample t-test
## 
## data:  step1$b_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.005541205  0.004607776
## sample estimates:
##     mean of x 
## -0.0004667146