install.packages(c("tidymodels", "ISLR", "glmnet", "readr"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom 1.0.8 ✔ recipes 1.3.0
## ✔ dials 1.4.0 ✔ rsample 1.3.0
## ✔ dplyr 1.1.4 ✔ tibble 3.2.1
## ✔ ggplot2 3.5.2 ✔ tidyr 1.3.1
## ✔ infer 1.0.8 ✔ tune 1.3.0
## ✔ modeldata 1.4.0 ✔ workflows 1.2.0
## ✔ parsnip 1.3.1 ✔ workflowsets 1.1.0
## ✔ purrr 1.0.4 ✔ yardstick 1.3.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ recipes::step() masks stats::step()
library(ISLR)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-8
data(Hitters)
Hitters <- na.omit(Hitters)
set.seed(123)
data_split <- initial_split(Hitters, prop = 0.75)
train_data <- training(data_split)
test_data <- testing(data_split)
hitters_recipe <- recipe(Salary ~ ., data = train_data) %>%
step_dummy(all_nominal_predictors()) %>%
step_zv(all_predictors()) %>%
step_normalize(all_predictors())
ridge_model <- linear_reg(penalty = tune(), mixture = 0) %>%
set_engine("glmnet")
lasso_model <- linear_reg(penalty = tune(), mixture = 1) %>%
set_engine("glmnet")
folds <- vfold_cv(train_data, v = 10)
ridge_wf <- workflow() %>%
add_model(ridge_model) %>%
add_recipe(hitters_recipe)
lasso_wf <- workflow() %>%
add_model(lasso_model) %>%
add_recipe(hitters_recipe)
ridge_results <- tune_grid(
ridge_wf,
resamples = folds,
grid = 50
)
lasso_results <- tune_grid(
lasso_wf,
resamples = folds,
grid = 50
)
autoplot(ridge_results)

autoplot(lasso_results)

# Select best penalty for ridge based on RMSE
best_ridge <- select_best(ridge_results, metric = "rmse")
final_ridge <- finalize_workflow(ridge_wf, best_ridge)
ridge_fit <- fit(final_ridge, data = train_data)
# Select best penalty for lasso based on RMSE
best_lasso <- select_best(lasso_results, metric = "rmse")
final_lasso <- finalize_workflow(lasso_wf, best_lasso)
lasso_fit <- fit(final_lasso, data = train_data)