2.

a.

The correct answer is iii. That is because the Lasso adds an L1 penalty to the least squares objective, making it less flexible and improving he prediction accuracy compared to least sqaures.

b.

The correct answer is iii. This is because ridge regression adds a L2 penalty to the least sqaures objective. It also shrinks coefficients toward zero, but not exactly zero.

c.

The correct answer is i. That is because non-linear methods are more flexible than linear models. This means that there is lower bias,but higher variance.

9.

a.

library(ISLR2)   
## Warning: package 'ISLR2' was built under R version 4.4.2
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
set.seed(1)
train_index <- createDataPartition(College$Apps, p = 0.7, list = FALSE)
train <- College[train_index, ]
test <- College[-train_index, ]

b.

lm_fit <- lm(Apps ~ ., data = train)
lm_pred <- predict(lm_fit, newdata = test)
lm_mse <- mean((lm_pred - test$Apps)^2)
lm_mse  
## [1] 921637.5

c.

install.packages("glmnet")
## Installing package into 'C:/Users/austr/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'glmnet' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'glmnet'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\austr\AppData\Local\R\win-library\4.4\00LOCK\glmnet\libs\x64\glmnet.dll
## to C:\Users\austr\AppData\Local\R\win-library\4.4\glmnet\libs\x64\glmnet.dll:
## Permission denied
## Warning: restored 'glmnet'
## 
## The downloaded binary packages are in
##  C:\Users\austr\AppData\Local\Temp\RtmpMv8wNb\downloaded_packages
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.4.3
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-8
x_train <- model.matrix(Apps ~ ., data = train)[, -1]
y_train <- train$Apps
x_test <- model.matrix(Apps ~ ., data = test)[, -1]
y_test <- test$Apps

set.seed(1)
ridge_cv <- cv.glmnet(x_train, y_train, alpha = 0)
best_lambda_ridge <- ridge_cv$lambda.min

ridge_pred <- predict(ridge_cv, s = best_lambda_ridge, newx = x_test)
ridge_mse <- mean((ridge_pred - y_test)^2)
ridge_mse
## [1] 1032561

The test error value is 1032561.

d.

set.seed(1)
lasso_cv <- cv.glmnet(x_train, y_train, alpha = 1)
best_lambda_lasso <- lasso_cv$lambda.min

lasso_pred <- predict(lasso_cv, s = best_lambda_lasso, newx = x_test)
lasso_mse <- mean((lasso_pred - y_test)^2)
lasso_mse
## [1] 924712.9
lasso_coef <- predict(lasso_cv, s = best_lambda_lasso, type = "coefficients")
nonzero_count <- sum(lasso_coef != 0) - 1  
nonzero_count
## [1] 17

The test error value is 924712.9, and there is 17 non-zero coefficient estimates.

e.

install.packages("pls")
## Installing package into 'C:/Users/austr/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'pls' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\austr\AppData\Local\Temp\RtmpMv8wNb\downloaded_packages
library(pls)
## Warning: package 'pls' was built under R version 4.4.3
## 
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
## 
##     R2
## The following object is masked from 'package:stats':
## 
##     loadings
set.seed(1)
pcr_fit <- pcr(Apps ~ ., data = train, scale = TRUE, validation = "CV")
summary(pcr_fit)
## Data:    X dimension: 545 17 
##  Y dimension: 545 1
## Fit method: svdpc
## Number of components considered: 17
## 
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##        (Intercept)  1 comps  2 comps  3 comps  4 comps  5 comps  6 comps
## CV            4012     3956     2197     2198     1862     1787     1774
## adjCV         4012     3956     2194     2195     1850     1777     1768
##        7 comps  8 comps  9 comps  10 comps  11 comps  12 comps  13 comps
## CV        1745     1741     1663      1653      1675      1677      1692
## adjCV     1734     1733     1657      1646      1669      1670      1685
##        14 comps  15 comps  16 comps  17 comps
## CV         1687      1572      1268      1229
## adjCV      1683      1542      1257      1220
## 
## TRAINING: % variance explained
##       1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps  8 comps
## X      30.774    56.86    63.89    69.96    75.50    80.34    83.92    87.39
## Apps    3.484    71.35    71.53    80.87    82.05    82.35    83.27    83.42
##       9 comps  10 comps  11 comps  12 comps  13 comps  14 comps  15 comps
## X       90.39     92.72     94.89     96.66     97.87     98.70     99.34
## Apps    84.72     84.91     84.95     84.97     84.97     85.21     91.17
##       16 comps  17 comps
## X        99.85    100.00
## Apps     92.67     92.95
validationplot(pcr_fit, val.type = "MSEP")

best_m_pcr <- which.min(pcr_fit$validation$PRESS)

pcr_pred <- predict(pcr_fit, newdata = test, ncomp = best_m_pcr)
pcr_mse <- mean((pcr_pred - test$Apps)^2)
pcr_mse
## [1] 921637.5

The test error value is 921637.5, and the value of M selected by cross-validation is 17.

f.

set.seed(1)
pls_fit <- plsr(Apps ~ ., data = train, scale = TRUE, validation = "CV")
summary(pls_fit)
## Data:    X dimension: 545 17 
##  Y dimension: 545 1
## Fit method: kernelpls
## Number of components considered: 17
## 
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##        (Intercept)  1 comps  2 comps  3 comps  4 comps  5 comps  6 comps
## CV            4012     2003     1700     1602     1510     1325     1260
## adjCV         4012     1999     1693     1594     1492     1308     1249
##        7 comps  8 comps  9 comps  10 comps  11 comps  12 comps  13 comps
## CV        1250     1245     1237      1235      1233      1230      1229
## adjCV     1240     1235     1227      1225      1223      1220      1219
##        14 comps  15 comps  16 comps  17 comps
## CV         1229      1229      1229      1229
## adjCV      1219      1220      1220      1220
## 
## TRAINING: % variance explained
##       1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps  8 comps
## X       26.22    38.96    62.40    65.22    67.99    73.15    77.62    81.12
## Apps    76.71    84.67    86.89    90.38    92.42    92.75    92.81    92.85
##       9 comps  10 comps  11 comps  12 comps  13 comps  14 comps  15 comps
## X       83.38     86.63     89.56     90.79     92.46     94.70     96.64
## Apps    92.89     92.92     92.93     92.95     92.95     92.95     92.95
##       16 comps  17 comps
## X        98.78    100.00
## Apps     92.95     92.95
validationplot(pls_fit, val.type = "MSEP")

best_m_pls <- which.min(pls_fit$validation$PRESS)

pls_pred <- predict(pls_fit, newdata = test, ncomp = best_m_pls)
pls_mse <- mean((pls_pred - test$Apps)^2)
pls_mse
## [1] 921438

The test error value is 921438, and the value of M selected by cross-validation is 13.

g.

data.frame(
  Method = c("Least Squares", "Ridge", "Lasso", "PCR", "PLS"),
  Test_MSE = c(lm_mse, ridge_mse, lasso_mse, pcr_mse, pls_mse)
)
##          Method  Test_MSE
## 1 Least Squares  921637.5
## 2         Ridge 1032561.1
## 3         Lasso  924712.9
## 4           PCR  921637.5
## 5           PLS  921438.0

I think that we can predict the number of applications with good accuracy. With the lowest MSE being 921438.0 from PLS, and the least squares and PCR methods following closely, we can see that there is very little difference between the models. ## 11.

library(MASS)       
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:ISLR2':
## 
##     Boston
library(leaps)      
## Warning: package 'leaps' was built under R version 4.4.2
library(glmnet)     
library(pls)        
library(caret)      
library(tidyverse)


data(Boston)
set.seed(1)

a.

train_index <- createDataPartition(Boston$crim, p = 0.7, list = FALSE)
train <- Boston[train_index, ]
test <- Boston[-train_index, ]

best subset selection

regfit_full <- regsubsets(crim ~ ., data = train, nvmax = 13)
reg_summary <- summary(regfit_full)


which.min(reg_summary$bic)
## [1] 2

test error

test_mat <- model.matrix(crim ~ ., data = test)
coef_best <- coef(regfit_full, id = which.min(reg_summary$bic))
pred_best <- test_mat[, names(coef_best)] %*% coef_best
mse_best <- mean((test$crim - pred_best)^2)
mse_best
## [1] 22.15037

ridge regression

x_train <- model.matrix(crim ~ ., train)[, -1]
y_train <- train$crim
x_test <- model.matrix(crim ~ ., test)[, -1]
y_test <- test$crim

ridge_cv <- cv.glmnet(x_train, y_train, alpha = 0)
ridge_lambda <- ridge_cv$lambda.min
ridge_pred <- predict(ridge_cv, s = ridge_lambda, newx = x_test)
ridge_mse <- mean((ridge_pred - y_test)^2)
ridge_mse
## [1] 20.51066

lasso

lasso_cv <- cv.glmnet(x_train, y_train, alpha = 1)
lasso_lambda <- lasso_cv$lambda.min
lasso_pred <- predict(lasso_cv, s = lasso_lambda, newx = x_test)
lasso_mse <- mean((lasso_pred - y_test)^2)
lasso_coef <- predict(lasso_cv, s = lasso_lambda, type = "coefficients")
lasso_nonzero <- sum(lasso_coef != 0) - 1
lasso_mse
## [1] 20.45048
lasso_nonzero
## [1] 12

PCR

pcr_fit <- pcr(crim ~ ., data = train, scale = TRUE, validation = "CV")
validationplot(pcr_fit, val.type = "MSEP")

best_m_pcr <- which.min(pcr_fit$validation$PRESS)
pcr_pred <- predict(pcr_fit, test, ncomp = best_m_pcr)
pcr_mse <- mean((pcr_pred - test$crim)^2)
pcr_mse
## [1] 21.8159

b.

data.frame(
  Method = c("Best Subset", "Ridge", "Lasso", "PCR"),
  Test_MSE = c(mse_best, ridge_mse, lasso_mse, pcr_mse),
  Nonzero_Coefficients = c(NA, 13, lasso_nonzero, best_m_pcr)
)
##        Method Test_MSE Nonzero_Coefficients
## 1 Best Subset 22.15037                   NA
## 2       Ridge 20.51066                   13
## 3       Lasso 20.45048                   12
## 4         PCR 21.81590                   13

Out of all the models, the Lasso regression has the best performance, with the lowest MSE value of 20.45. We can also see that it reduces the number of predictors used to 12 out of 13, which makes the model more simple and interpretable.

c.

The final model does not use all the features. Since I picked the Lasso method, it performs variable selection by shrinking some coefficients to exactly zero. This helps with overfitting and improves the interpretability.