install.packages(“tidymodels”) install.packages(“ISLR”) install.packages(“ISLR2”) install.packages(“discrim”) install.packages(“poissonreg”) install.packages(“corrr”) install.packages(“paletteer”)

library(tidymodels) library(ISLR) # For the Smarket data set library(ISLR2) # For the Bikeshare data set library(discrim) library(poissonreg) library(corrr)

Correlation plot

cor_Smarket <- Smarket %>% select(-Direction) %>% correlate()

rplot(cor_Smarket, colours = c(“indianred2”, “black”, “skyblue1”))

Heatmap

library(paletteer) cor_Smarket %>% stretch() %>% ggplot(aes(x, y, fill = r)) + geom_tile() + geom_text(aes(label = as.character(fashion(r)))) + scale_fill_paletteer_c(“scico::roma”, limits = c(-1, 1), direction = -1)

Volume over time

ggplot(Smarket, aes(Year, Volume)) + geom_jitter(height = 0)

Logistic regression spec

lr_spec <- logistic_reg() %>% set_engine(“glm”) %>% set_mode(“classification”)

Fit on full data

lr_fit <- lr_spec %>% fit( Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = Smarket )

lr_fit lr_fit %>% pluck(“fit”) %>% summary() tidy(lr_fit)

predict(lr_fit, new_data = Smarket) predict(lr_fit, new_data = Smarket, type = “prob”)

augment(lr_fit, new_data = Smarket) %>% conf_mat(truth = Direction, estimate = .pred_class)

augment(lr_fit, new_data = Smarket) %>% conf_mat(truth = Direction, estimate = .pred_class) %>% autoplot(type = “heatmap”)

augment(lr_fit, new_data = Smarket) %>% accuracy(truth = Direction, estimate = .pred_class)

Train/test split by year

Smarket_train <- Smarket %>% filter(Year != 2005)

Smarket_test <- Smarket %>% filter(Year == 2005)

Fit on training data (all predictors)

lr_fit2 <- lr_spec %>% fit( Direction ~ Lag1 + Lag2 + Lag3 + Lag4 + Lag5 + Volume, data = Smarket_train )

augment(lr_fit2, new_data = Smarket_test) %>% conf_mat(truth = Direction, estimate = .pred_class)

augment(lr_fit2, new_data = Smarket_test) %>% accuracy(truth = Direction, estimate = .pred_class)

Fit on training data (Lag1 + Lag2 only)

lr_fit3 <- lr_spec %>% fit( Direction ~ Lag1 + Lag2, data = Smarket_train )

augment(lr_fit3, new_data = Smarket_test) %>% conf_mat(truth = Direction, estimate = .pred_class)

augment(lr_fit3, new_data = Smarket_test) %>% accuracy(truth = Direction, estimate = .pred_class)

Predict on new data

Smarket_new <- tibble( Lag1 = c(1.2, 1.5), Lag2 = c(1.1, -0.8) )

predict( lr_fit3, new_data = Smarket_new, type = “prob” )