# Helper packages
library(tidyverse)   # for data wrangling & plotting
# Modeling packages
library(tidymodels)  # for fitting MARS models
# Model interpretability packages
library(vip)         # for variable importance
library(pdp)         # for variable relationships

library(rsample)   # data splitting 
library(ggplot2)   # plotting

library(earth)  # fit MARS models

library(caret)     # automating the tuning process
library(vip)       # variable importance
library(pdp)       # variable relationships
boston <- read_csv("data/boston.csv")
  1. Split the data into a training set and test set using a 70-30% split. Be sure to include the set.seed(123) so that your train and test sets are the same size as mine.
set.seed(123)
split <- initial_split(boston, prop = .7, strata = cmedv)
boston_train <- training(split)
boston_test <- testing(split)
  1. Create a recipe that will model cmedv as a function of all predictor variables. Apply the following feature engineering steps in this order:
  1. Normalize all numeric predictor variables using a Yeo-Johnson transformation
  2. Standardize all numeric predictor variables
boston_recipe <- recipe(cmedv ~ ., data = boston_train) %>%
step_YeoJohnson(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors()) 
  1. Create a 5-fold cross validation resampling object.
set.seed(123)
kfolds <- vfold_cv(boston_train, v = 5, strata = cmedv)
  1. Create a regularized regression model object that:
  1. Contains tuning placeholders for the mixture and penalty arguments
  2. Sets the engine to use the glmnet package.
reg_mod <- linear_reg(mixture = tune(), penalty = tune()) %>%
set_engine("glmnet")
  1. Create our hyperparameter search grid that:
  1. Searches across default values for mixture
  2. Searches across values ranging from -10 to 5 for penalty
  3. Will search across 10 values for each of these hyperparameters (levels)

reg_grid <- grid_regular(mixture(), penalty(range = c(-10,5)), levels = 10)
  1. Creates a workflow object that combines our recipe object with our model object.
boston_wf <- workflow() %>%
add_recipe(boston_recipe) %>%
add_model(reg_mod)
  1. Performs a hyperparameter search.
tuning_results <- boston_wf %>%
tune_grid(resamples = kfolds, grid = reg_grid)
  1. Assesses the results.
tuning_results %>%
collect_metrics() %>%
filter(.metric == "rmse") %>%
arrange(mean)
## # A tibble: 100 × 8
##          penalty mixture .metric .estimator  mean     n std_err .config         
##            <dbl>   <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>           
##  1 0.0215          1     rmse    standard    4.46     5   0.235 Preprocessor1_M…
##  2 0.0215          0.889 rmse    standard    4.46     5   0.235 Preprocessor1_M…
##  3 0.0215          0.778 rmse    standard    4.46     5   0.235 Preprocessor1_M…
##  4 0.0215          0.667 rmse    standard    4.47     5   0.234 Preprocessor1_M…
##  5 0.0215          0.556 rmse    standard    4.47     5   0.234 Preprocessor1_M…
##  6 0.0215          0.444 rmse    standard    4.47     5   0.234 Preprocessor1_M…
##  7 0.0000000001    0.667 rmse    standard    4.47     5   0.233 Preprocessor1_M…
##  8 0.00000000464   0.667 rmse    standard    4.47     5   0.233 Preprocessor1_M…
##  9 0.000000215     0.667 rmse    standard    4.47     5   0.233 Preprocessor1_M…
## 10 0.00001         0.667 rmse    standard    4.47     5   0.233 Preprocessor1_M…
## # … with 90 more rows
autoplot(tuning_results)

  1. finalize our workflow object with the optimal hyperparameter values,
# Step 1. finalize our workflow object with the optimal hyperparameter values
best_hyperparameters <- select_best(tuning_results, metric = "rmse")
final_wf <- workflow() %>%
add_recipe(boston_recipe) %>%
add_model(reg_mod) %>%
finalize_workflow(best_hyperparameters)
  1. fit our final workflow object across the full training set data, and
final_fit <- final_wf %>%
fit(data = boston_train)
  1. plot the top 10 most influential features.
final_fit %>%
extract_fit_parsnip() %>%
vip()

#Part 2: Tuning a Regularized Classification Model

  1. Split the data into a training set and test set using a 70-30% split. Be sure to include the set.seed(123) so that your train and test sets are the same size as mine.
library(kernlab)
data(spam)
set.seed(123) # for reproducibility
split <- initial_split(spam, prop = .7, strata = type)
spam_train <- training(split)
spam_test <- testing(split)
  1. Create a recipe that will model type as a function of all predictor variables. Apply the following feature engineering steps in this order:
  1. Normalize all numeric predictor variables using a Yeo-Johnson transformation
  2. Standardize all numeric predictor variables
spam_recipe <- recipe(type ~ ., data = spam_train) %>%
step_YeoJohnson(all_numeric_predictors()) %>%
step_dummy(all_nominal_predictors())
  1. Create a 5-fold cross validation resampling object.
set.seed(123)
kfolds <- vfold_cv(spam_train, v = 5, strata = type)
  1. Create a regularized logistic regression model object that:
  1. Contains tuning placeholders for the mixture and penalty arguments
  2. Sets the engine to use the glmnet package.
  3. Sets the mode to be a classification model.
logit_mod <- logistic_reg(mixture = tune(), penalty = tune()) %>%
set_engine("glmnet") %>%
set_mode("classification")
  1. Create our hyperparameter search grid that:
  1. Searches across default values for mixture
  2. Searches across values ranging from -10 to 5 for penalty
  3. Will search across 10 values for each of these hyperparameters (levels)
logit_grid <- grid_regular(mixture(), penalty(range = c(-10,5)), levels = 10)
  1. Creates a workflow object that combines our recipe object with our model object.
spam_wf <- workflow() %>%
add_recipe(spam_recipe) %>%
add_model(logit_mod)
  1. Performs a hyperparameter search.
tuning_results <- spam_wf %>%
tune_grid(resamples = kfolds, grid = logit_grid)
  1. Assesses the results.
tuning_results %>%
collect_metrics() %>%
filter(.metric == "roc_auc") %>%
arrange(desc(mean))
## # A tibble: 100 × 8
##         penalty mixture .metric .estimator  mean     n std_err .config          
##           <dbl>   <dbl> <chr>   <chr>      <dbl> <int>   <dbl> <chr>            
##  1 0.000464       1     roc_auc binary     0.979     5 0.00352 Preprocessor1_Mo…
##  2 0.000464       0.889 roc_auc binary     0.979     5 0.00349 Preprocessor1_Mo…
##  3 0.000464       0.778 roc_auc binary     0.979     5 0.00347 Preprocessor1_Mo…
##  4 0.000464       0.667 roc_auc binary     0.979     5 0.00344 Preprocessor1_Mo…
##  5 0.000464       0.222 roc_auc binary     0.979     5 0.00329 Preprocessor1_Mo…
##  6 0.000464       0.333 roc_auc binary     0.979     5 0.00333 Preprocessor1_Mo…
##  7 0.000464       0.556 roc_auc binary     0.979     5 0.00342 Preprocessor1_Mo…
##  8 0.000464       0.111 roc_auc binary     0.979     5 0.00328 Preprocessor1_Mo…
##  9 0.000464       0.444 roc_auc binary     0.979     5 0.00339 Preprocessor1_Mo…
## 10 0.0000000001   0.111 roc_auc binary     0.979     5 0.00335 Preprocessor1_Mo…
## # … with 90 more rows
autoplot(tuning_results)

  1. finalize our workflow object with the optimal hyperparameter values
best_hyperparameters <- select_best(tuning_results, metric = "roc_auc")
final_wf <- workflow() %>%
add_recipe(spam_recipe) %>%
add_model(logit_mod) %>%
finalize_workflow(best_hyperparameters)
  1. fit our final workflow object across the full training set data, and
final_fit <- final_wf %>%
fit(data = spam_train)
  1. plot the top 10 most influential features.
final_fit %>%
extract_fit_parsnip() %>%
vip()

#Part 3: Tuning a MARS Classification Model

library(kernlab)
data(spam)
  1. Split the data into a training set and test set using a 70-30% split. Be sure to include the set.seed(123) so that your train and test sets are the same size as mine.
set.seed(123) # for reproducibility
split <- initial_split(spam, prop = .7, strata = type)
spam_train <- training(split)
spam_test <- testing(split)
  1. Create a recipe that will model type as a function of all predictor variables. Apply the following feature engineering steps in this order:
  1. Normalize all numeric predictor variables using a Yeo-Johnson transformation
  2. Standardize all numeric predictor variables
spam_recipe <- recipe(type ~ ., data = spam_train) %>%
step_YeoJohnson(all_numeric_predictors()) %>%
step_normalize(all_nominal_predictors())
  1. Create a 5-fold cross validation resampling object.
set.seed(123)
kfolds <- vfold_cv(spam_train, v = 5, strata = type)
  1. Create a MARS model object that:
  1. Contains tuning placeholders for the num_terms and prod_degree arguments
  2. Sets the mode to be a classification model.
mars_mod <- mars(num_terms = tune(), prod_degree = tune()) %>%
set_mode("classification")
  1. Create our hyperparameter search grid that:
  1. Searches across values ranging from 1 to 30 for num_terms
  2. Searches across default values for prod_degree
  3. Will search across 25 values for each of these hyperparameters (levels)
mars_grid <- grid_regular(num_terms(range = c(1,30)), prod_degree(), levels = 25)
  1. Creates a workflow object that combines our recipe object with our model object.
spam_wf <- workflow() %>%
add_recipe(spam_recipe) %>%
add_model(mars_mod)
  1. Performs a hyperparameter search.
tuning_results <- spam_wf %>%
tune_grid(resamples = kfolds, grid = mars_grid)
  1. Assesses the results.
tuning_results %>%
collect_metrics() %>%
filter(.metric == "roc_auc") %>%
arrange(desc(mean))
## # A tibble: 50 × 8
##    num_terms prod_degree .metric .estimator  mean     n std_err .config         
##        <int>       <int> <chr>   <chr>      <dbl> <int>   <dbl> <chr>           
##  1        25           2 roc_auc binary     0.975     5 0.00339 Preprocessor1_M…
##  2        26           2 roc_auc binary     0.975     5 0.00339 Preprocessor1_M…
##  3        27           2 roc_auc binary     0.975     5 0.00339 Preprocessor1_M…
##  4        28           2 roc_auc binary     0.975     5 0.00339 Preprocessor1_M…
##  5        30           2 roc_auc binary     0.975     5 0.00339 Preprocessor1_M…
##  6        22           2 roc_auc binary     0.975     5 0.00339 Preprocessor1_M…
##  7        20           2 roc_auc binary     0.975     5 0.00334 Preprocessor1_M…
##  8        21           2 roc_auc binary     0.975     5 0.00328 Preprocessor1_M…
##  9        23           2 roc_auc binary     0.975     5 0.00335 Preprocessor1_M…
## 10        19           2 roc_auc binary     0.975     5 0.00330 Preprocessor1_M…
## # … with 40 more rows

Plot

autoplot(tuning_results)

  1. finalize our workflow object with the optimal hyperparameter values,
best_hyperparameters <- select_best(tuning_results, metric = "roc_auc")
final_wf <- workflow() %>%
add_recipe(spam_recipe) %>%
add_model(mars_mod) %>%
finalize_workflow(best_hyperparameters)
  1. fit our final workflow object across the full training set data, and
final_fit <- final_wf %>%
fit(data = spam_train)
  1. plot the top 10 most influential features.
final_fit %>%
extract_fit_parsnip() %>%
vip()