Data Analysis
library(broom)
library(tidyverse)
# 1. Import data safely using semicolon delimiter
data <- read_delim("data.csv", delim = ";")
# 2. Clean up data types and handle text-to-number forced conversions
data_clean <- data %>%
mutate(across(c(ri, MKT, SMB, HML), as.numeric)) %>%
mutate(date = as.Date(date, format = "%d-%b-%y")) %>%
drop_na(ri, MKT, SMB, HML)
# 3. Fama-MacBeth Step 0: Time-Series Regressions (Estimate Betas per Symbol)
step0 <- data_clean %>%
group_by(symbol) %>%
nest(data = c(date, ri, MKT, SMB, HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
)) %>%
unnest(estimates) %>%
select(symbol, estimate, term) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
select(symbol,
b_MKT = MKT,
b_HML = HML,
b_SMB = SMB) %>%
ungroup()
# Merge the estimated betas back to the cleaned dataset
step0_merged <- data_clean %>%
left_join(step0, by = "symbol")
# 4. Fama-MacBeth Step 1: Cross-Sectional Regressions (Estimate Risk Premia per Date)
step1 <- step0_merged %>%
group_by(date) %>%
nest(data = c(symbol, ri, b_MKT, b_SMB, b_HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x))
)) %>%
unnest(estimates) %>%
select(date, estimate, term) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
select(date, b_MKT, b_HML, b_SMB) %>%
ungroup()
# 5. Fama-MacBeth Step 2: Estimate time series averages & Significance
t.test(step1$b_MKT, mu = 0)
##
## One Sample t-test
##
## data: step1$b_MKT
## t = -0.71163, df = 1256, p-value = 0.4768
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -256.0301 119.7291
## sample estimates:
## mean of x
## -68.15049
t.test(step1$b_SMB, mu = 0)
##
## One Sample t-test
##
## data: step1$b_SMB
## t = 1.0107, df = 1256, p-value = 0.3124
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002876405 0.008989319
## sample estimates:
## mean of x
## 0.003056457
t.test(step1$b_HML, mu = 0)
##
## One Sample t-test
##
## data: step1$b_HML
## t = 0.21531, df = 1256, p-value = 0.8296
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01572488 0.01960192
## sample estimates:
## mean of x
## 0.001938522