library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.1.5
## ✔ forcats   1.0.1     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
## Warning: package 'broom' was built under R version 4.5.2
data <- read.csv("data.csv")
head(data)
##   symbol      date            ri          MKT     SMB     HML
## 1   AAPL  4-Jan-11  0.0052062641 -0.001313890 -0.0065  0.0008
## 2   AAPL  5-Jan-11  0.0081462879  0.004994670  0.0018  0.0013
## 3   AAPL  6-Jan-11 -0.0008082435 -0.002125228  0.0001 -0.0025
## 4   AAPL  7-Jan-11  0.0071360567 -0.001846505  0.0022 -0.0006
## 5   AAPL 10-Jan-11  0.0186572890 -0.001377275  0.0041  0.0039
## 6   AAPL 11-Jan-11 -0.0023681840  0.003718222  0.0016  0.0036
step0 <- data %>% 
  nest(data = c(date, ri, MKT, SMB, HML)) %>% 
  mutate(estimates = map(
    data,
    ~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
  )) %>% 
  unnest(estimates) %>% 
  select(symbol, estimate, term) %>% 
  pivot_wider(names_from  = term,
              values_from = estimate) %>% 
  select(symbol, b_MKT = MKT, b_HML = HML, b_SMB = SMB)

step0
## # A tibble: 6 × 4
##   symbol b_MKT   b_HML    b_SMB
##   <chr>  <dbl>   <dbl>    <dbl>
## 1 AAPL   0.900 -0.0578  0.0685 
## 2 FORD   0.513  0.138  -0.264  
## 3 GE     1.08   0.0902  0.0994 
## 4 GM     1.29  -0.0222  0.00390
## 5 IBM    0.817 -0.0121  0.0336 
## 6 MSFT   0.966 -0.0641  0.0582
step0 <- data %>% 
  left_join(step0, by = "symbol")
step1 <- step0 %>% 
  nest(data = c(symbol, ri, b_MKT, b_SMB, b_HML)) %>% 
  mutate(estimates = map(
    data,
    ~tidy(lm(ri ~ b_MKT + b_SMB + b_HML, data = .x))
  )) %>%
  unnest(estimates) %>% 
  select(date, estimate, term) %>% 
  pivot_wider(names_from  = term,
              values_from = estimate) %>% 
  select(date, b_MKT, b_HML, b_SMB)

head(step1)
## # A tibble: 6 × 4
##   date         b_MKT   b_HML    b_SMB
##   <chr>        <dbl>   <dbl>    <dbl>
## 1 4-Jan-11   0.0416   0.0574 -0.0255 
## 2 5-Jan-11  -0.0113   0.0628 -0.158  
## 3 6-Jan-11   0.0373  -0.173   0.00703
## 4 7-Jan-11   0.0127  -0.0642  0.0323 
## 5 10-Jan-11 -0.0366   0.0586  0.0171 
## 6 11-Jan-11  0.00409  0.0899 -0.0954
library(knitr)

t.test(step1$b_MKT, mu = 0)
## 
##  One Sample t-test
## 
## data:  step1$b_MKT
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.002546371  0.001722208
## sample estimates:
##     mean of x 
## -0.0004120813
t.test(step1$b_SMB, mu = 0)
## 
##  One Sample t-test
## 
## data:  step1$b_SMB
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.003711466  0.011076953
## sample estimates:
##   mean of x 
## 0.003682744
t.test(step1$b_HML, mu = 0)
## 
##  One Sample t-test
## 
## data:  step1$b_HML
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.005541205  0.004607776
## sample estimates:
##     mean of x 
## -0.0004667146
step1_lambdas <- step1 %>%
  rename(lam_MKT = b_MKT, lam_SMB = b_SMB, lam_HML = b_HML)

step1_lambdas %>%
  pivot_longer(
    cols      = c(lam_MKT, lam_SMB, lam_HML),
    names_to  = "Factor",
    values_to = "Lambda"
  ) %>%
  mutate(Factor = recode(Factor,
    lam_MKT = "MKT (Market)",
    lam_SMB = "SMB (Size)",
    lam_HML = "HML (Value)"
  )) %>%
  ggplot(aes(x = Lambda, fill = Factor)) +
  geom_histogram(bins = 60, alpha = 0.75, colour = "white") +
  geom_vline(xintercept = 0, colour = "black", linetype = "dashed", linewidth = 0.8) +
  facet_wrap(~Factor, scales = "free") +
  scale_fill_manual(values = c("#2980b9", "#27ae60", "#e74c3c")) +
  labs(
    title    = "Distribution of Daily Cross-Sectional Risk Premia",
    subtitle = "Dashed line marks zero; a distribution centred away from zero indicates a nonzero risk premium",
    x        = "Lambda (Risk Premium)",
    y        = "Frequency",
    caption  = "Source: Fama-French Three-Factor Model | Daily data, Jan 2011 – Dec 2015"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    legend.position  = "none",
    strip.text       = element_text(face = "bold"),
    plot.title       = element_text(face = "bold", size = 14),
    plot.subtitle    = element_text(colour = "grey40", size = 11)
  )