install.packages(c("tidymodels", "ISLR", "glmnet", "readr"))
## Installing packages into '/cloud/lib/x86_64-pc-linux-gnu-library/4.4'
## (as 'lib' is unspecified)
library(tidymodels)
## ── Attaching packages ────────────────────────────────────── tidymodels 1.3.0 ──
## ✔ broom        1.0.8     ✔ recipes      1.3.0
## ✔ dials        1.4.0     ✔ rsample      1.3.0
## ✔ dplyr        1.1.4     ✔ tibble       3.2.1
## ✔ ggplot2      3.5.2     ✔ tidyr        1.3.1
## ✔ infer        1.0.8     ✔ tune         1.3.0
## ✔ modeldata    1.4.0     ✔ workflows    1.2.0
## ✔ parsnip      1.3.1     ✔ workflowsets 1.1.0
## ✔ purrr        1.0.4     ✔ yardstick    1.3.2
## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ recipes::step()  masks stats::step()
library(ISLR)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-8
data(Hitters)
Hitters <- na.omit(Hitters)
set.seed(123)
data_split <- initial_split(Hitters, prop = 0.75)
train_data <- training(data_split)
test_data  <- testing(data_split)
hitters_recipe <- recipe(Salary ~ ., data = train_data) %>%
  step_dummy(all_nominal_predictors()) %>%
  step_zv(all_predictors()) %>%
  step_normalize(all_predictors())
ridge_model <- linear_reg(penalty = tune(), mixture = 0) %>%
  set_engine("glmnet")

lasso_model <- linear_reg(penalty = tune(), mixture = 1) %>%
  set_engine("glmnet")
folds <- vfold_cv(train_data, v = 10)

ridge_wf <- workflow() %>%
  add_model(ridge_model) %>%
  add_recipe(hitters_recipe)

lasso_wf <- workflow() %>%
  add_model(lasso_model) %>%
  add_recipe(hitters_recipe)
ridge_results <- tune_grid(
  ridge_wf,
  resamples = folds,
  grid = 50
)

lasso_results <- tune_grid(
  lasso_wf,
  resamples = folds,
  grid = 50
)
autoplot(ridge_results)

autoplot(lasso_results)

# Select best penalty for ridge based on RMSE
best_ridge <- select_best(ridge_results, metric = "rmse")
final_ridge <- finalize_workflow(ridge_wf, best_ridge)
ridge_fit <- fit(final_ridge, data = train_data)

# Select best penalty for lasso based on RMSE
best_lasso <- select_best(lasso_results, metric = "rmse")
final_lasso <- finalize_workflow(lasso_wf, best_lasso)
lasso_fit <- fit(final_lasso, data = train_data)