library(broom)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("data.csv")

step0 <- data %>%
  nest(data = c(date, ri, MKT, SMB, HML)) %>%
  mutate(estimates = map(
    data,
    ~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
  )) %>%
  unnest(estimates) %>%
  select(symbol, estimate, term) %>%
  pivot_wider(names_from = term,
              values_from = estimate) %>%
  select(symbol,
         b_MKT = MKT,
         b_HML = HML,
         b_SMB = SMB)

step0 <- data %>%
  left_join(step0, by = "symbol")



step1 <- data %>%
  group_by(date) %>%  # Group by date
  nest() %>%  # Nest everything else
  mutate(estimates = map(
    data,
    ~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
  )) %>%
  unnest(estimates) %>%
  select(date, term, estimate) %>%  # Now date is available
  pivot_wider(names_from = term,
              values_from = estimate) %>%
  select(date, `(Intercept)`, b_MKT = MKT, b_HML = HML, b_SMB = SMB)



str(data)
## 'data.frame':    7542 obs. of  6 variables:
##  $ symbol: chr  "AAPL" "AAPL" "AAPL" "AAPL" ...
##  $ date  : chr  "4-Jan-11" "5-Jan-11" "6-Jan-11" "7-Jan-11" ...
##  $ ri    : num  0.005206 0.008146 -0.000808 0.007136 0.018657 ...
##  $ MKT   : num  -0.00131 0.00499 -0.00213 -0.00185 -0.00138 ...
##  $ SMB   : num  -0.0065 0.0018 0.0001 0.0022 0.0041 0.0016 0.0031 -0.0026 -0.001 0.0056 ...
##  $ HML   : num  0.0008 0.0013 -0.0025 -0.0006 0.0039 0.0036 0 -0.0044 -0.0073 0.0015 ...
print(nrow(step1))
## [1] 1257
print(names(step1))
## [1] "date"        "(Intercept)" "b_MKT"       "b_HML"       "b_SMB"
if("b_MKT" %in% names(step1)) {
  print(sum(!is.na(step1$b_MKT)))
  print(sum(!is.na(step1$b_SMB)))
  print(sum(!is.na(step1$b_HML)))
} else {
  print("Column names in step1:")
  print(names(step1))
}
## [1] 0
## [1] 0
## [1] 0