Assignment 5: Linear Model Selection

Question 2

For parts (a) through (c), indicate which of i. through iv. is correct. Justify your answer.
(a). For the lasso regression mostly relies on lamda which is a controlling factor in shrinkage. Therefore the lasso will restrict the size of the regression coefficient. This has a decrease in variance, but an increase in bias. So based on this option (iii) is correct which states:
Less flexible and hence will give improved prediction accuracy when its increase in bias is less than its decrease in variance

(b). For the ridge regression, we have a similar pattern for bias and variance. As variance decreases, the bias will increase as the coefficient goes to 0. This once again makes it less flexible. So here option (iii) is correct. Same as above.

(c). Non linear regression we simply know is more flexible, so option (i) is correct which states: i. More flexible and hence will give improved prediction accuracy when its increase in bias is less than its decrease in variance.

Question 9

9. In this exercise, we will predict the number of applications received using the other variables in the College data set.

(a)

library(ISLR)
library(tidyverse)

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

set.seed(11)
College[, -1] <- apply(College[, -1], 2, scale)
train.size <- dim(College)[1]/2
train <- sample(1:dim(College)[1], train.size)
test <- train
College.train <- College[train, ]
College.test <- College[test, ]

(b)

lm.fit <- lm(Apps ~ ., data=College.train)
lm.pred <- predict(lm.fit, College.test)
mean((College.test[, "Apps"]-lm.pred)^2)

## [1] 0.08087712

Here our RSS is 0.08087712

(c)

library(glmnet)

## Warning: package 'glmnet' was built under R version 4.1.3

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Loaded glmnet 4.1-3

train_mat = model.matrix(Apps~., data=College.train)[,-1]
train.mat = model.matrix(Apps~., data=College.train)[,-1]
train.mat = model.matrix(Apps~., data=College.train)[,-1]
test.mat = model.matrix(Apps~., data=College.test)[,-1]
grid = 10^seq(10, -5, length=1000)
mod.ridge = cv.glmnet(train.mat, College.train$Apps, alpha=0, lambda=grid, thresh=1e-12)

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

lambda.best <- mod.ridge$lambda.min
lambda.best

## [1] 1.035178e-05

ridge.pred <- predict(mod.ridge, newx=test.mat, s=lambda.best)

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

ridge.mse=mean((College.test$Apps - ridge.pred)^2)
ridge.mse

## [1] 0.08087712

(d)

mod.lasso = cv.glmnet(train.mat, College.train$Apps, alpha=1, lambda=grid, thresh=1e-12)

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

lambda.best = mod.lasso$lambda.min
lambda.best

## [1] 1.035178e-05

lasso.pred = predict(mod.lasso, newx=test.mat, s=lambda.best)

## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values

lasso.mse=mean((College.test$Apps - lasso.pred)^2)
lasso.mse

## [1] 0.08087714

(e)

library(pls)

## Warning: package 'pls' was built under R version 4.1.3

## 
## Attaching package: 'pls'

## The following object is masked from 'package:stats':
## 
##     loadings

pcr.fit = pcr(Apps~., data=College.train, scale=T, validation="CV")
pcr.pred = predict(pcr.fit, College.test, ncomp=9)
pcr.mse=mean((College.test$Apps - pcr.pred)^2)
pcr.mse

## [1] 0.1900342

(f)

pls.fit = plsr(Apps~., data=College.train, scale=T, validation="CV")
pls.pred = predict(pls.fit, College.test, ncomp=5)
pls.mse=mean((College.test$Apps - pls.pred)^2)
pls.mse

## [1] 0.09188887

Question 11

library(leaps)

## Warning: package 'leaps' was built under R version 4.1.3

library(MASS)

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

set.seed(1)
predict.regsubsets = function(object, newdata, id, ...) {
form = as.formula(object$call[[2]])
mat = model.matrix(form, newdata)
coefi = coef(object, id = id)
mat[, names(coefi)] %*% coefi
}
k = 10
p = ncol(Boston) - 1
folds = sample(rep(1:k, length = nrow(Boston)))
cv.errors = matrix(NA, k, p)
for (i in 1:k) {
best.fit = regsubsets(crim ~ ., data = Boston[folds != i, ], nvmax = p)
for (j in 1:p) {
pred = predict(best.fit, Boston[folds == i, ], id = j)
cv.errors[i, j] = mean((Boston$crim[folds == i] - pred)^2)
}
}
rmse.cv = sqrt(apply(cv.errors, 2, mean))
plot(rmse.cv, pch = 19, type = "b")

best_rmse=rmse.cv[which.min(rmse.cv)]
best_rmse

## [1] 6.543281

x = model.matrix(crim ~ . - 1, data = Boston)
y = Boston$crim
cv.lasso = cv.glmnet(x, y, type.measure = "mse")
plot(cv.lasso)

lasso_rmse=sqrt(cv.lasso$cvm[cv.lasso$lambda == cv.lasso$lambda.1se])
lasso_rmse

## [1] 7.921353

x = model.matrix(crim ~ . - 1, data = Boston)
y = Boston$crim
cv.ridge = cv.glmnet(x, y, type.measure = "mse", alpha = 0)
plot(cv.ridge)

ridge_rmse=sqrt(cv.ridge$cvm[cv.ridge$lambda == cv.ridge$lambda.1se])
ridge_rmse

## [1] 7.669133

Assignment 5: Linear Model Selection

Andrew Mazza

Question 2

Question 9

Question 11