library(broom)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read.csv("data.csv")
step0 <- data %>%
nest(data = c(date, ri, MKT, SMB, HML)) %>%
mutate(estimates = map(
data,
~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
)) %>%
unnest(estimates) %>%
select(symbol, estimate, term) %>%
pivot_wider(names_from = term,
values_from = estimate) %>%
select(symbol,
b_MKT = MKT,
b_HML = HML,
b_SMB = SMB)
step0 <- data %>%
left_join(step0, by = "symbol")
step1 <- data %>%
group_by(date) %>% # Group by date
nest() %>% # Nest everything else
mutate(estimates = map(
data,
~tidy(lm(ri ~ MKT + SMB + HML, data = .x))
)) %>%
unnest(estimates) %>%
select(date, term, estimate) %>% # Now date is available
pivot_wider(names_from = term,
values_from = estimate) %>%
select(date, `(Intercept)`, b_MKT = MKT, b_HML = HML, b_SMB = SMB)
str(data)
## 'data.frame': 7542 obs. of 6 variables:
## $ symbol: chr "AAPL" "AAPL" "AAPL" "AAPL" ...
## $ date : chr "4-Jan-11" "5-Jan-11" "6-Jan-11" "7-Jan-11" ...
## $ ri : num 0.005206 0.008146 -0.000808 0.007136 0.018657 ...
## $ MKT : num -0.00131 0.00499 -0.00213 -0.00185 -0.00138 ...
## $ SMB : num -0.0065 0.0018 0.0001 0.0022 0.0041 0.0016 0.0031 -0.0026 -0.001 0.0056 ...
## $ HML : num 0.0008 0.0013 -0.0025 -0.0006 0.0039 0.0036 0 -0.0044 -0.0073 0.0015 ...
print(nrow(step1))
## [1] 1257
print(names(step1))
## [1] "date" "(Intercept)" "b_MKT" "b_HML" "b_SMB"
if("b_MKT" %in% names(step1)) {
print(sum(!is.na(step1$b_MKT)))
print(sum(!is.na(step1$b_SMB)))
print(sum(!is.na(step1$b_HML)))
} else {
print("Column names in step1:")
print(names(step1))
}
## [1] 0
## [1] 0
## [1] 0