library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(broom)
df <- read_csv("data.csv")
## Rows: 7542 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): symbol, date
## dbl (4): ri, MKT, SMB, HML
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(df)
## Rows: 7,542
## Columns: 6
## $ symbol <chr> "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL", "AAPL",…
## $ date <chr> "4-Jan-11", "5-Jan-11", "6-Jan-11", "7-Jan-11", "10-Jan-11", "1…
## $ ri <dbl> 0.0052062641, 0.0081462879, -0.0008082435, 0.0071360567, 0.0186…
## $ MKT <dbl> -0.0013138901, 0.0049946699, -0.0021252276, -0.0018465050, -0.0…
## $ SMB <dbl> -0.0065, 0.0018, 0.0001, 0.0022, 0.0041, 0.0016, 0.0031, -0.002…
## $ HML <dbl> 0.0008, 0.0013, -0.0025, -0.0006, 0.0039, 0.0036, 0.0000, -0.00…
# Step 0: Estimate factor loadings (betas) for each stock
step0 <- df %>%
group_by(symbol) %>%
nest() %>%
mutate(estimates = map(data, ~ tidy(lm(ri ~ MKT + SMB + HML, data = .)))) %>%
unnest(estimates) %>%
select(symbol, term, estimate) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
rename(beta_mkt = MKT, beta_smb = SMB, beta_hml = HML) %>%
select(symbol, beta_mkt, beta_smb, beta_hml)
# View Step 0 result
print(step0)
## # A tibble: 6 × 4
## # Groups: symbol [6]
## symbol beta_mkt beta_smb beta_hml
## <chr> <dbl> <dbl> <dbl>
## 1 AAPL 0.900 0.0685 -0.0578
## 2 FORD 0.513 -0.264 0.138
## 3 GE 1.08 0.0994 0.0902
## 4 GM 1.29 0.00390 -0.0222
## 5 IBM 0.817 0.0336 -0.0121
## 6 MSFT 0.966 0.0582 -0.0641
# Join betas back into original data by stock symbol
df1 <- left_join(df, step0, by = "symbol")
# Step 1: For each date, regress returns on the factor loadings (betas)
step1 <- df1 %>%
group_by(date) %>%
nest() %>%
mutate(regression = map(data, ~ tidy(lm(ri ~ beta_mkt + beta_smb + beta_hml, data = .)))) %>%
unnest(regression) %>%
select(date, term, estimate) %>%
pivot_wider(names_from = term, values_from = estimate)
# View Step 1 result
print(step1)
## # A tibble: 1,257 × 5
## # Groups: date [1,257]
## date `(Intercept)` beta_mkt beta_smb beta_hml
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 4-Jan-11 -0.0298 0.0416 -0.0255 0.0574
## 2 5-Jan-11 0.0223 -0.0113 -0.158 0.0628
## 3 6-Jan-11 -0.0292 0.0373 0.00703 -0.173
## 4 7-Jan-11 -0.0175 0.0127 0.0323 -0.0642
## 5 10-Jan-11 0.0364 -0.0366 0.0171 0.0586
## 6 11-Jan-11 0.00274 0.00409 -0.0954 0.0899
## 7 12-Jan-11 0.0723 -0.0554 -0.164 0.0430
## 8 13-Jan-11 0.0150 -0.0194 0.00181 0.0256
## 9 14-Jan-11 0.0198 -0.0165 0.0633 0.0392
## 10 18-Jan-11 -0.0189 0.0101 0.0525 -0.0900
## # ℹ 1,247 more rows
# Step One: Cross-sectional Regression (per time period)
df1 <- left_join(df, step0, by = "symbol")
step1 <- df1 %>%
group_by(date) %>%
nest() %>%
mutate(regression = map(data, ~ tidy(lm(ri ~ beta_mkt + beta_smb + beta_hml, data = .)))) %>%
unnest(regression)
# Ensure Step 1's output is correct and exists
step1_wide <- step1 %>%
pivot_wider(names_from = term, values_from = estimate)
# Check if step1_wide exists and look at its structure
print(head(step1_wide))
## # A tibble: 6 × 9
## # Groups: date [2]
## date data std.error statistic p.value `(Intercept)` beta_mkt beta_smb
## <chr> <list> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4-Jan-11 <tibble> 0.00542 -5.49 0.0316 -0.0298 NA NA
## 2 4-Jan-11 <tibble> 0.00574 7.25 0.0185 NA 0.0416 NA
## 3 4-Jan-11 <tibble> 0.0129 -1.98 0.186 NA NA -0.0255
## 4 4-Jan-11 <tibble> 0.0166 3.45 0.0746 NA NA NA
## 5 5-Jan-11 <tibble> 0.0252 0.887 0.469 0.0223 NA NA
## 6 5-Jan-11 <tibble> 0.0267 -0.425 0.712 NA -0.0113 NA
## # ℹ 1 more variable: beta_hml <dbl>
step2 <- step1_wide %>%
summarise(
mean_beta_mkt = mean(beta_mkt, na.rm = TRUE),
mean_beta_smb = mean(beta_smb, na.rm = TRUE),
mean_beta_hml = mean(beta_hml, na.rm = TRUE)
)
# View the time series average of the coefficients
print(step2)
## # A tibble: 1,257 × 4
## date mean_beta_mkt mean_beta_smb mean_beta_hml
## <chr> <dbl> <dbl> <dbl>
## 1 1-Apr-11 0.0935 -0.101 0.0335
## 2 1-Apr-13 0.0257 -0.0385 0.0621
## 3 1-Apr-14 -0.00167 0.202 -0.132
## 4 1-Apr-15 -0.0412 0.00570 0.0649
## 5 1-Aug-11 -0.0111 -0.307 0.104
## 6 1-Aug-12 0.0174 0.0990 -0.0267
## 7 1-Aug-13 0.0271 -0.00876 0.0284
## 8 1-Aug-14 0.00726 0.201 0.0110
## 9 1-Dec-11 -0.0511 0.0512 0.0284
## 10 1-Dec-14 -0.0226 -0.142 -0.0117
## # ℹ 1,247 more rows
# Optional: Run t-tests for statistical significance
t_test_mkt <- t.test(step1_wide$beta_mkt, mu = 0)
t_test_smb <- t.test(step1_wide$beta_smb, mu = 0)
t_test_hml <- t.test(step1_wide$beta_hml, mu = 0)
# Print the results of the t-tests
print(t_test_mkt)
##
## One Sample t-test
##
## data: step1_wide$beta_mkt
## t = -0.37879, df = 1256, p-value = 0.7049
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002546371 0.001722208
## sample estimates:
## mean of x
## -0.0004120813
print(t_test_smb)
##
## One Sample t-test
##
## data: step1_wide$beta_smb
## t = 0.97712, df = 1256, p-value = 0.3287
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003711466 0.011076953
## sample estimates:
## mean of x
## 0.003682744
print(t_test_hml)
##
## One Sample t-test
##
## data: step1_wide$beta_hml
## t = -0.18044, df = 1256, p-value = 0.8568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005541205 0.004607776
## sample estimates:
## mean of x
## -0.0004667146