This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
DJI = read.table("dow_jones_index.data")
summary(DJI)
## V1
## Length:751
## Class :character
## Mode :character
DJI2 <- read_excel("dowjones.xlsx")
summary(DJI2)
## quarter stock date
## Min. :1.00 Length:750 Min. :2011-01-07 00:00:00
## 1st Qu.:1.00 Class :character 1st Qu.:2011-02-18 00:00:00
## Median :2.00 Mode :character Median :2011-04-01 00:00:00
## Mean :1.52 Mean :2011-03-31 23:02:24
## 3rd Qu.:2.00 3rd Qu.:2011-05-13 00:00:00
## Max. :2.00 Max. :2011-06-24 00:00:00
##
## open high low close
## Min. : 10.59 Min. : 10.94 Min. : 10.40 Min. : 10.52
## 1st Qu.: 29.83 1st Qu.: 30.63 1st Qu.: 28.72 1st Qu.: 30.36
## Median : 45.97 Median : 46.88 Median : 44.80 Median : 45.93
## Mean : 53.65 Mean : 54.67 Mean : 52.64 Mean : 53.73
## 3rd Qu.: 72.72 3rd Qu.: 74.29 3rd Qu.: 71.04 3rd Qu.: 72.67
## Max. :172.11 Max. :173.54 Max. :167.82 Max. :170.58
##
## volume percent_change_price percent_change_volume_over_last_wk
## Min. :9.719e+06 Min. :-15.42290 Min. :-61.4332
## 1st Qu.:3.087e+07 1st Qu.: -1.28805 1st Qu.:-19.8043
## Median :5.306e+07 Median : 0.00000 Median : 0.5126
## Mean :1.175e+08 Mean : 0.05026 Mean : 5.5936
## 3rd Qu.:1.327e+08 3rd Qu.: 1.65089 3rd Qu.: 21.8006
## Max. :1.453e+09 Max. : 9.88223 Max. :327.4089
## NA's :30
## previous_weeks_volume next_weeks_open next_weeks_close
## Min. :9.719e+06 Min. : 10.52 Min. : 10.52
## 1st Qu.:3.068e+07 1st Qu.: 30.32 1st Qu.: 30.46
## Median :5.295e+07 Median : 46.02 Median : 46.12
## Mean :1.174e+08 Mean : 53.70 Mean : 53.89
## 3rd Qu.:1.333e+08 3rd Qu.: 72.72 3rd Qu.: 72.92
## Max. :1.453e+09 Max. :172.11 Max. :174.54
## NA's :30
## percent_change_next_weeks_price days_to_next_dividend
## Min. :-15.4229 Min. : 0.00
## 1st Qu.: -1.2221 1st Qu.: 24.00
## Median : 0.1012 Median : 47.00
## Mean : 0.2385 Mean : 52.53
## 3rd Qu.: 1.8456 3rd Qu.: 69.00
## Max. : 9.8822 Max. :336.00
##
## percent_return_next_dividend
## Min. :0.06557
## 1st Qu.:0.53455
## Median :0.68107
## Mean :0.69183
## 3rd Qu.:0.85429
## Max. :1.56421
##
anyNA(DJI2)
## [1] TRUE
na_count_DJI2 <- sum(is.na(DJI2))
print(na_count_DJI2)
## [1] 60
DJI2 %>%
summarise(across(everything(), ~sum(is.na(.x)))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "na_count") %>%
filter(na_count > 0) %>%
arrange(desc(na_count))
## # A tibble: 2 × 2
## variable na_count
## <chr> <int>
## 1 percent_change_volume_over_last_wk 30
## 2 previous_weeks_volume 30
DJI3 <- DJI2
DJI3$percent_change_volume_over_last_wk[is.na(DJI2$percent_change_volume_over_last_wk)] <- mean(DJI3$percent_change_volume_over_last_wk, na.rm=TRUE)
DJI3$previous_weeks_volume[is.na(DJI2$previous_weeks_volume)] <- mean(DJI3$previous_weeks_volume, na.rm=TRUE)
na_count_DJI3 <- sum(is.na(DJI3))
print(na_count_DJI3)
## [1] 0
glm(percent_change_next_weeks_price ~ . , family = gaussian(), DJI3)
##
## Call: glm(formula = percent_change_next_weeks_price ~ ., family = gaussian(),
## data = DJI3)
##
## Coefficients:
## (Intercept) quarter
## 1.861e+01 1.578e-01
## stockAXP stockBA
## -3.945e+00 -8.260e+00
## stockBAC stockCAT
## -1.281e+00 -1.001e+01
## stockCSCO stockCVX
## -2.832e+00 -1.241e+01
## stockDD stockDIS
## -7.913e+00 -8.602e+00
## stockGE stockHD
## -5.166e+00 -5.645e+00
## stockHPQ stockIBM
## -3.039e+00 -1.672e+01
## stockINTC stockJNJ
## -6.573e+00 -1.015e+01
## stockJPM stockKO
## -5.824e+00 -8.836e+00
## stockKRFT stockMCD
## -6.848e+00 -1.069e+01
## stockMMM stockMRK
## -1.038e+01 -9.373e+00
## stockMSFT stockPFE
## -5.198e+00 -7.235e+00
## stockPG stockT
## -9.629e+00 -1.191e+01
## stockTRV stockUTX
## -7.613e+00 -9.256e+00
## stockVZ stockWMT
## -1.157e+01 -7.513e+00
## stockXOM date
## -9.238e+00 -1.726e-08
## open high
## 2.497e-02 -2.835e-02
## low close
## 7.937e-02 -5.720e-02
## volume percent_change_price
## 3.979e-09 1.463e-02
## percent_change_volume_over_last_wk previous_weeks_volume
## -1.152e-03 -7.917e-10
## next_weeks_open next_weeks_close
## -1.369e+00 1.447e+00
## days_to_next_dividend percent_return_next_dividend
## -1.361e-04 8.587e+00
##
## Degrees of Freedom: 749 Total (i.e. Null); 706 Residual
## Null Deviance: 5378
## Residual Deviance: 1485 AIC: 2731
str(DJI3)
## tibble [750 × 16] (S3: tbl_df/tbl/data.frame)
## $ quarter : num [1:750] 1 1 1 1 1 1 1 1 1 1 ...
## $ stock : chr [1:750] "AA" "AA" "AA" "AA" ...
## $ date : POSIXct[1:750], format: "2011-01-07" "2011-01-14" ...
## $ open : num [1:750] 15.8 16.7 16.2 15.9 16.2 ...
## $ high : num [1:750] 16.7 16.7 16.4 16.6 17.4 ...
## $ low : num [1:750] 15.8 15.6 15.6 15.8 16.2 ...
## $ close : num [1:750] 16.4 16 15.8 16.1 17.1 ...
## $ volume : num [1:750] 2.40e+08 2.43e+08 1.38e+08 1.51e+08 1.54e+08 ...
## $ percent_change_price : num [1:750] 3.79 -4.43 -2.47 1.64 5.93 ...
## $ percent_change_volume_over_last_wk: num [1:750] 5.59 1.38 -43.02 9.36 1.99 ...
## $ previous_weeks_volume : num [1:750] 1.17e+08 2.40e+08 2.43e+08 1.38e+08 1.51e+08 ...
## $ next_weeks_open : num [1:750] 16.7 16.2 15.9 16.2 17.3 ...
## $ next_weeks_close : num [1:750] 16 15.8 16.1 17.1 17.4 ...
## $ percent_change_next_weeks_price : num [1:750] -4.428 -2.471 1.638 5.933 0.231 ...
## $ days_to_next_dividend : num [1:750] 26 19 12 5 97 90 83 76 69 62 ...
## $ percent_return_next_dividend : num [1:750] 0.183 0.188 0.19 0.186 0.175 ...
DJI3$date <- as.Date(DJI3$date, format = "%Y-%m-%d")
DJI4 <- DJI3 |> arrange(date)
head(DJI4)
## # A tibble: 6 × 16
## quarter stock date open high low close volume percent_change_price
## <dbl> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 AA 2011-01-07 15.8 16.7 15.8 16.4 2.40e8 3.79
## 2 1 AXP 2011-01-07 43.3 45.6 43.1 44.4 4.51e7 2.45
## 3 1 BA 2011-01-07 66.2 70.1 66 69.4 3.63e7 4.88
## 4 1 BAC 2011-01-07 13.8 14.7 13.8 14.2 1.45e9 2.89
## 5 1 CAT 2011-01-07 94.4 94.8 92.3 93.7 2.41e7 -0.689
## 6 1 CSCO 2011-01-07 20.4 21 20.4 21.0 3.04e8 2.54
## # ℹ 7 more variables: percent_change_volume_over_last_wk <dbl>,
## # previous_weeks_volume <dbl>, next_weeks_open <dbl>, next_weeks_close <dbl>,
## # percent_change_next_weeks_price <dbl>, days_to_next_dividend <dbl>,
## # percent_return_next_dividend <dbl>
train_DJI5 <- DJI4 |>
filter(quarter == 1)
test_DJI5 <- DJI4 |>
filter(quarter ==2)
summary(train_DJI5)
## quarter stock date open
## Min. :1 Length:360 Min. :2011-01-07 Min. : 13.71
## 1st Qu.:1 Class :character 1st Qu.:2011-01-26 1st Qu.: 28.38
## Median :1 Mode :character Median :2011-02-14 Median : 45.81
## Mean :1 Mean :2011-02-14 Mean : 52.77
## 3rd Qu.:1 3rd Qu.:2011-03-05 3rd Qu.: 71.23
## Max. :1 Max. :2011-03-25 Max. :164.18
## high low close volume
## Min. : 14.22 Min. : 13.32 Min. : 13.34 Min. :1.071e+07
## 1st Qu.: 28.82 1st Qu.: 27.91 1st Qu.: 28.45 1st Qu.:3.438e+07
## Median : 46.84 Median : 44.68 Median : 45.80 Median :5.708e+07
## Mean : 53.82 Mean : 51.81 Mean : 52.98 Mean :1.254e+08
## 3rd Qu.: 72.84 3rd Qu.: 69.32 3rd Qu.: 71.65 3rd Qu.:1.398e+08
## Max. :167.72 Max. :163.18 Max. :164.84 Max. :1.453e+09
## percent_change_price percent_change_volume_over_last_wk previous_weeks_volume
## Min. :-15.4229 Min. :-60.988 Min. :1.071e+07
## 1st Qu.: -1.0501 1st Qu.:-23.613 1st Qu.:3.594e+07
## Median : 0.2789 Median : 2.142 Median :6.526e+07
## Mean : 0.2623 Mean : 4.638 Mean :1.265e+08
## 3rd Qu.: 1.8813 3rd Qu.: 21.409 3rd Qu.:1.313e+08
## Max. : 7.6217 Max. :327.409 Max. :1.453e+09
## next_weeks_open next_weeks_close percent_change_next_weeks_price
## Min. : 13.42 Min. : 13.34 Min. :-15.4229
## 1st Qu.: 28.38 1st Qu.: 28.41 1st Qu.: -0.9999
## Median : 46.02 Median : 45.93 Median : 0.3704
## Mean : 52.99 Mean : 53.23 Mean : 0.2871
## 3rd Qu.: 71.45 3rd Qu.: 71.71 3rd Qu.: 1.8305
## Max. :164.18 Max. :164.84 Max. : 7.6217
## days_to_next_dividend percent_return_next_dividend
## Min. : 0.00 Min. :0.06557
## 1st Qu.: 23.75 1st Qu.:0.52827
## Median : 47.00 Median :0.67720
## Mean : 53.76 Mean :0.69453
## 3rd Qu.: 69.00 3rd Qu.:0.86207
## Max. :336.00 Max. :1.56421
summary(test_DJI5)
## quarter stock date open
## Min. :2 Length:390 Min. :2011-04-01 Min. : 10.59
## 1st Qu.:2 Class :character 1st Qu.:2011-04-21 1st Qu.: 30.74
## Median :2 Mode :character Median :2011-05-13 Median : 46.02
## Mean :2 Mean :2011-05-12 Mean : 54.46
## 3rd Qu.:2 3rd Qu.:2011-06-03 3rd Qu.: 74.56
## Max. :2 Max. :2011-06-24 Max. :172.11
## high low close volume
## Min. : 10.94 Min. : 10.40 Min. : 10.52 Min. : 9718851
## 1st Qu.: 31.31 1st Qu.: 30.20 1st Qu.: 30.73 1st Qu.: 28794263
## Median : 46.91 Median : 45.19 Median : 46.27 Median : 49836700
## Mean : 55.46 Mean : 53.40 Mean : 54.43 Mean :110307075
## 3rd Qu.: 75.49 3rd Qu.: 72.95 3rd Qu.: 74.67 3rd Qu.:117721728
## Max. :173.54 Max. :167.82 Max. :170.58 Max. :889460755
## percent_change_price percent_change_volume_over_last_wk previous_weeks_volume
## Min. :-10.4975 Min. :-61.433 Min. : 9718851
## 1st Qu.: -1.5454 1st Qu.:-14.278 1st Qu.: 28324494
## Median : -0.1317 Median : 1.475 Median : 48269156
## Mean : -0.1455 Mean : 6.475 Mean :109000266
## 3rd Qu.: 1.4685 3rd Qu.: 19.442 3rd Qu.:117095779
## Max. : 9.8822 Max. :249.984 Max. :889460755
## next_weeks_open next_weeks_close percent_change_next_weeks_price
## Min. : 10.52 Min. : 10.52 Min. :-10.49750
## 1st Qu.: 30.74 1st Qu.: 30.86 1st Qu.: -1.45328
## Median : 46.03 Median : 46.27 Median : -0.06029
## Mean : 54.36 Mean : 54.49 Mean : 0.19355
## 3rd Qu.: 74.56 3rd Qu.: 74.70 3rd Qu.: 1.88444
## Max. :172.11 Max. :174.54 Max. : 9.88223
## days_to_next_dividend percent_return_next_dividend
## Min. : 0.00 Min. :0.07418
## 1st Qu.: 26.00 1st Qu.:0.53839
## Median : 47.00 Median :0.68614
## Mean : 51.38 Mean :0.68933
## 3rd Qu.: 69.00 3rd Qu.:0.85081
## Max. :252.00 Max. :1.41727
df <- train_DJI5
# Example: df has columns Stock and Value
stocks <- unique(train_DJI5$stock)
for (stock in stocks) {
stock_plot <- ggplot(df[train_DJI5$stock == stock, ], aes(x = stock, y = percent_change_price)) +
geom_boxplot(fill = "skyblue") +
geom_jitter(width = 0.15, size = 1.5, alpha = 0.7, color = "darkblue") +
labs(title = paste("Q1_ Boxplot for", stock),
x = "Stock",
y = "% Return") +
theme_minimal()
ggsave(filename = paste0("Q1_Boxplot_", stock, ".png"), plot = stock_plot, width = 6, height = 4)
}
stocks <- unique(train_DJI5$stock)
lm_stocks <- data.frame(
Stock = character(),
Beta = numeric(),
MSE = numeric(),
R2 = numeric(),
stringsAsFactors = FALSE
)
for (s in stocks) {
train_s <- train_DJI5 %>% filter(stock == s)
test_s <- test_DJI5 %>% filter(stock == s)
if (nrow(train_s) < 2 || nrow(test_s) == 0) next
drop_cols <- intersect(c("stock", "Stock", "quarter", "Quarter"), names(train_s))
train_s <- dplyr::select(train_s, -all_of(drop_cols))
test_s <- dplyr::select(test_s, -all_of(drop_cols))
train_s <- dplyr::select(train_s, where(is.numeric))
test_s <- dplyr::select(test_s, where(is.numeric))
keep_cols <- sapply(train_s, function(x) length(unique(x)) > 1)
train_s <- train_s[, keep_cols, drop = FALSE]
test_s <- test_s[, keep_cols, drop = FALSE]
if (!"percent_change_price" %in% names(train_s)) next
preds <- setdiff(names(train_s), "percent_change_price")
if (length(preds) == 0) next
formula_s <- as.formula(
paste("percent_change_price ~", paste(preds, collapse = " + "))
)
fit_s <- lm(formula_s, data = train_s)
pred_vals <- predict(fit_s, newdata = test_s)
mse_s <- mean((pred_vals - test_s$percent_change_price)^2, na.rm = TRUE)
sse <- sum((pred_vals - test_s$percent_change_price)^2, na.rm = TRUE)
sst <- sum((test_s$percent_change_price - mean(test_s$percent_change_price, na.rm = TRUE))^2, na.rm = TRUE)
r2_test <- ifelse(sst == 0, NA_real_, 1 - sse/sst)
beta_s <- coef(fit_s)[2]
rmse <- sqrt(mean((pred_vals - test_s$percent_change_price)^2, na.rm = TRUE))
lm_stocks <- rbind(lm_stocks,
data.frame(Stock = s, Beta = beta_s, MSE = mse_s, R2 = r2_test, RMSE = rmse))
# Create data for plot
plot_df <- data.frame(
obs_id = seq_len(nrow(test_s)),
Actual = test_s$percent_change_price,
Pred = pred_vals
)
p <- ggplot(plot_df, aes(x = obs_id)) +
geom_line(aes(y = Actual, color = "Actual"), linewidth = 1) +
geom_line(aes(y = Pred, color = "Predicted"), linewidth = 1, linetype = "dashed") +
labs(
title = paste("Actual vs Predicted for", s),
x = "Q2 Week # (Test Set)",
y = "percent_change_price",
color = ""
) +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
# Save the plot as a PNG
filename <- paste0("linear_plot_", s, ".png")
ggsave(filename = filename, plot = p, width = 6, height = 4, dpi = 300)
message("Saved plot for ", s, " → ", filename)
}
## Saved plot for AA → linear_plot_AA.png
## Saved plot for AXP → linear_plot_AXP.png
## Saved plot for BA → linear_plot_BA.png
## Saved plot for BAC → linear_plot_BAC.png
## Saved plot for CAT → linear_plot_CAT.png
## Saved plot for CSCO → linear_plot_CSCO.png
## Saved plot for CVX → linear_plot_CVX.png
## Saved plot for DD → linear_plot_DD.png
## Saved plot for DIS → linear_plot_DIS.png
## Saved plot for GE → linear_plot_GE.png
## Saved plot for HD → linear_plot_HD.png
## Saved plot for HPQ → linear_plot_HPQ.png
## Saved plot for IBM → linear_plot_IBM.png
## Saved plot for INTC → linear_plot_INTC.png
## Saved plot for JNJ → linear_plot_JNJ.png
## Saved plot for JPM → linear_plot_JPM.png
## Saved plot for KRFT → linear_plot_KRFT.png
## Saved plot for KO → linear_plot_KO.png
## Saved plot for MCD → linear_plot_MCD.png
## Saved plot for MMM → linear_plot_MMM.png
## Saved plot for MRK → linear_plot_MRK.png
## Saved plot for MSFT → linear_plot_MSFT.png
## Saved plot for PFE → linear_plot_PFE.png
## Saved plot for PG → linear_plot_PG.png
## Saved plot for T → linear_plot_T.png
## Saved plot for TRV → linear_plot_TRV.png
## Saved plot for UTX → linear_plot_UTX.png
## Saved plot for VZ → linear_plot_VZ.png
## Saved plot for WMT → linear_plot_WMT.png
## Saved plot for XOM → linear_plot_XOM.png
# View summary table
lm_stocks
## Stock Beta MSE R2 RMSE
## open AA -6.2029824 1.561763e-01 0.9892393 0.39519150
## open1 AXP -2.1372260 1.106554e-01 0.9782802 0.33264906
## open2 BA -1.4498696 2.668131e-01 0.9614604 0.51653953
## open3 BAC -5.2427173 5.850675e+00 -0.4636001 2.41881689
## open4 CAT -2.1891859 1.911225e+02 -13.0480830 13.82470471
## open5 CSCO -7.6664370 2.941887e+02 -39.6192764 17.15192858
## open6 CVX -1.7515942 4.721032e+00 0.2438875 2.17279351
## open7 DD -1.5879892 1.206705e+01 -0.5690389 3.47376585
## open8 DIS -2.4743154 2.151446e+00 0.6008815 1.46678074
## open9 GE -4.5246677 1.928530e-01 0.9513643 0.43915028
## open10 HD -2.3118176 1.804019e+00 0.5407556 1.34313787
## open11 HPQ -4.2788385 2.067081e+01 -0.8780404 4.54651643
## open12 IBM -0.6310905 9.244657e-03 0.9947966 0.09614914
## open13 INTC -4.5505550 9.368176e-01 0.9412649 0.96789337
## open14 JNJ -1.7631830 2.354709e-02 0.9948454 0.15345060
## open15 JPM -2.1086835 5.335163e-02 0.9882694 0.23097971
## open16 KRFT -3.0755085 4.276757e-01 0.8618524 0.65396920
## open17 KO -1.5219102 8.496587e-03 0.9956161 0.09217693
## open18 MCD -1.3392114 4.763068e-02 0.9691705 0.21824453
## open19 MMM -1.1137690 1.048478e-02 0.9968134 0.10239519
## open20 MRK -2.8511429 2.083228e-01 0.9584148 0.45642389
## open21 MSFT -3.0594096 2.885462e+00 0.3804582 1.69866481
## open22 PFE -5.4821097 2.352371e-01 0.9511855 0.48501250
## open23 PG -1.7052305 3.661273e-01 0.8927803 0.60508455
## open24 T -4.5303538 4.577664e+01 -12.4959012 6.76584396
## open25 TRV -2.2396539 7.363370e-01 0.7847406 0.85810079
## open26 UTX -1.2347414 1.804701e-02 0.9966683 0.13433916
## open27 VZ -2.8342122 2.712158e-02 0.9917136 0.16468632
## open28 WMT -1.9022279 2.550235e-02 0.9859318 0.15969456
## open29 XOM -1.2299851 2.847315e-02 0.9962196 0.16873990
lm_stocks |> arrange(desc(R2))
## Stock Beta MSE R2 RMSE
## open19 MMM -1.1137690 1.048478e-02 0.9968134 0.10239519
## open26 UTX -1.2347414 1.804701e-02 0.9966683 0.13433916
## open29 XOM -1.2299851 2.847315e-02 0.9962196 0.16873990
## open17 KO -1.5219102 8.496587e-03 0.9956161 0.09217693
## open14 JNJ -1.7631830 2.354709e-02 0.9948454 0.15345060
## open12 IBM -0.6310905 9.244657e-03 0.9947966 0.09614914
## open27 VZ -2.8342122 2.712158e-02 0.9917136 0.16468632
## open AA -6.2029824 1.561763e-01 0.9892393 0.39519150
## open15 JPM -2.1086835 5.335163e-02 0.9882694 0.23097971
## open28 WMT -1.9022279 2.550235e-02 0.9859318 0.15969456
## open1 AXP -2.1372260 1.106554e-01 0.9782802 0.33264906
## open18 MCD -1.3392114 4.763068e-02 0.9691705 0.21824453
## open2 BA -1.4498696 2.668131e-01 0.9614604 0.51653953
## open20 MRK -2.8511429 2.083228e-01 0.9584148 0.45642389
## open9 GE -4.5246677 1.928530e-01 0.9513643 0.43915028
## open22 PFE -5.4821097 2.352371e-01 0.9511855 0.48501250
## open13 INTC -4.5505550 9.368176e-01 0.9412649 0.96789337
## open23 PG -1.7052305 3.661273e-01 0.8927803 0.60508455
## open16 KRFT -3.0755085 4.276757e-01 0.8618524 0.65396920
## open25 TRV -2.2396539 7.363370e-01 0.7847406 0.85810079
## open8 DIS -2.4743154 2.151446e+00 0.6008815 1.46678074
## open10 HD -2.3118176 1.804019e+00 0.5407556 1.34313787
## open21 MSFT -3.0594096 2.885462e+00 0.3804582 1.69866481
## open6 CVX -1.7515942 4.721032e+00 0.2438875 2.17279351
## open3 BAC -5.2427173 5.850675e+00 -0.4636001 2.41881689
## open7 DD -1.5879892 1.206705e+01 -0.5690389 3.47376585
## open11 HPQ -4.2788385 2.067081e+01 -0.8780404 4.54651643
## open24 T -4.5303538 4.577664e+01 -12.4959012 6.76584396
## open4 CAT -2.1891859 1.911225e+02 -13.0480830 13.82470471
## open5 CSCO -7.6664370 2.941887e+02 -39.6192764 17.15192858
# 1) read files
dowjones <- read_excel("dowjones.xlsx")
sp500 <- read_excel("sp500.xlsx")
# 2) make sure date is Date
dowjones$date <- as.Date(dowjones$date)
sp500$date <- as.Date(sp500$date)
# 3) rename S&P return so we can tell them apart
sp500 <- sp500 %>%
dplyr::rename(sp500_return = percent_change_price) %>%
dplyr::select(date, sp500_return)
# 4) merge the two
df <- dowjones %>%
dplyr::select(date, stock, percent_change_price) %>%
dplyr::inner_join(sp500, by = "date") %>%
dplyr::rename(
stock_return = percent_change_price,
market_return = sp500_return
)
print(df)
## # A tibble: 720 × 4
## date stock stock_return market_return
## <date> <chr> <dbl> <dbl>
## 1 2011-01-07 AA 3.79 0.0171
## 2 2011-01-14 AA -4.43 -0.0077
## 3 2011-01-21 AA -2.47 -0.0055
## 4 2011-01-28 AA 1.64 0.0271
## 5 2011-02-04 AA 5.93 0.014
## 6 2011-02-11 AA 0.231 0.0104
## 7 2011-02-18 AA -0.633 -0.0172
## 8 2011-02-25 AA -1.77 0.001
## 9 2011-03-04 AA -1.37 -0.0128
## 10 2011-03-11 AA -3.32 -0.0192
## # ℹ 710 more rows
# 5) run CAPM per stock
stocks <- unique(df$stock)
capm_results <- data.frame(
Stock = character(),
Alpha = numeric(),
Beta = numeric(),
R2 = numeric(),
RMSE = numeric(),
stringsAsFactors = FALSE
)
for (s in stocks) {
df_s <- df %>% dplyr::filter(stock == s)
# skip tiny samples
if (nrow(df_s) < 10) next
capm_fit <- lm(stock_return ~ market_return, data = df_s)
summ <- summary(capm_fit)
alpha_s <- coef(capm_fit)[1]
beta_s <- coef(capm_fit)[2]
r2_s <- summ$r.squared
rmse_s <- sqrt(mean((predict(capm_fit, df_s) - df_s$stock_return)^2, na.rm = TRUE))
capm_results <- rbind(
capm_results,
data.frame(
Stock = s,
Alpha = alpha_s,
Beta = beta_s,
R2 = r2_s,
RMSE = rmse_s
)
)
# Security Characteristic Line
p <- ggplot(df_s, aes(x = market_return, y = stock_return)) +
geom_point(color = "gray70") +
geom_smooth(method = "lm", se = FALSE, color = "steelblue", linewidth = 1) +
labs(
title = paste("CAPM: SCL for", s),
subtitle = paste0("α = ", round(alpha_s, 4), " | β = ", round(beta_s, 3)),
x = "S&P 500 return",
y = paste(s, "return")
) +
theme_minimal()
ggsave(paste0("CAPM_", s, ".png"), p, width = 6, height = 4, dpi = 300)
message("Saved CAPM plot for ", s)
}
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for AA
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for AXP
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for BA
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for BAC
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for CAT
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for CSCO
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for CVX
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for DD
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for DIS
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for GE
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for HD
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for HPQ
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for IBM
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for INTC
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for JNJ
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for JPM
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for KRFT
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for KO
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for MCD
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for MMM
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for MRK
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for MSFT
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for PFE
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for PG
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for T
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for TRV
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for UTX
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for VZ
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for WMT
##
## `geom_smooth()` using formula = 'y ~ x'
## Saved CAPM plot for XOM
# ---- Step 6: View CAPM summary table ----
capm_results
## Stock Alpha Beta R2 RMSE
## (Intercept) AA -0.577924574 70.94495302 1.355143e-01 3.244169
## (Intercept)1 AXP 0.436013313 -5.11494553 1.232143e-03 2.636565
## (Intercept)2 BA 0.153085705 -29.55464460 4.543791e-02 2.452530
## (Intercept)3 BAC -0.896512831 -18.73512480 1.117968e-02 3.190040
## (Intercept)4 CAT -0.075268128 54.40775542 9.608016e-02 3.021374
## (Intercept)5 CSCO -1.227101304 -9.92968528 2.085422e-03 3.932610
## (Intercept)6 CVX 0.222200900 3.79943564 7.495573e-04 2.511594
## (Intercept)7 DD 0.153896184 64.79037235 1.912173e-01 2.412451
## (Intercept)8 DIS 0.018484568 20.91494179 1.832884e-02 2.771199
## (Intercept)9 GE -0.282185446 -4.10406279 1.220999e-03 2.125136
## (Intercept)10 HD 0.077660413 15.92576114 1.385907e-02 2.432194
## (Intercept)11 HPQ -0.643643163 29.20879736 1.885881e-02 3.814327
## (Intercept)12 IBM 0.502013428 11.85758169 1.656144e-02 1.654308
## (Intercept)13 INTC -0.215081164 -0.06793618 2.235314e-07 2.601521
## (Intercept)14 JNJ 0.085634538 -35.12053153 1.448399e-01 1.545028
## (Intercept)15 JPM -0.072795784 -25.52512839 3.867987e-02 2.303855
## (Intercept)16 KRFT 0.486315866 -2.18328176 4.667386e-04 1.829225
## (Intercept)17 KO 0.081198873 -10.38387254 1.141419e-02 1.749604
## (Intercept)18 MCD 0.242231747 -25.16491045 6.533737e-02 1.723208
## (Intercept)19 MMM 0.155249012 -19.28258341 3.218299e-02 1.914451
## (Intercept)20 MRK 0.031065889 -21.69551601 2.156415e-02 2.645852
## (Intercept)21 MSFT -0.564588438 4.79376755 2.192071e-03 1.851692
## (Intercept)22 PFE 0.729224799 -2.76701007 4.528491e-04 2.353592
## (Intercept)23 PG 0.046712519 -25.15717143 7.805838e-02 1.565306
## (Intercept)24 T 0.065350379 -25.97413122 6.595381e-02 1.769705
## (Intercept)25 TRV 0.003905631 -19.11335203 2.803254e-02 2.037638
## (Intercept)26 UTX 0.063147221 19.37766308 4.243202e-02 1.666614
## (Intercept)27 VZ -0.112558507 39.15827876 1.881299e-01 1.472766
## (Intercept)28 WMT 0.050189252 -4.12780933 2.448794e-03 1.508366
## (Intercept)29 XOM 0.106082268 -12.26801203 7.453260e-03 2.563139
beta_vector <- setNames(capm_results$Beta, capm_results$Stock)
beta_vector <- sort(beta_vector, decreasing = TRUE)
beta_vector
## AA DD CAT VZ HPQ DIS
## 70.94495302 64.79037235 54.40775542 39.15827876 29.20879736 20.91494179
## UTX HD IBM MSFT CVX INTC
## 19.37766308 15.92576114 11.85758169 4.79376755 3.79943564 -0.06793618
## KRFT PFE GE WMT AXP CSCO
## -2.18328176 -2.76701007 -4.10406279 -4.12780933 -5.11494553 -9.92968528
## KO XOM BAC TRV MMM MRK
## -10.38387254 -12.26801203 -18.73512480 -19.11335203 -19.28258341 -21.69551601
## PG MCD JPM T BA JNJ
## -25.15717143 -25.16491045 -25.52512839 -25.97413122 -29.55464460 -35.12053153
risky_stocks <- capm_results %>%
dplyr::filter(Beta > 1) %>%
dplyr::select(Stock, Beta)
risky_Bstocks <- setNames(risky_stocks$Beta, risky_stocks$Stock)
risky_Bstocks
## AA CAT CVX DD DIS HD HPQ IBM
## 70.944953 54.407755 3.799436 64.790372 20.914942 15.925761 29.208797 11.857582
## MSFT UTX VZ
## 4.793768 19.377663 39.158279