ch. 6: 2, 9, 11
2.
a) Lasso: option iii. The bias of the lasso increases as lambda increases. Variance and flexibility decrease, as well, under these conditions.
b) Ridge Regression: option iii. Ridge regression operates similarly to the lasso in that its flexibility decreases when variance decreases and, as a trade-off, bias increases.
c) Non-linear methods: option ii. These can be more flexible for certain data sets and better fit to the variables of interest.
9.
a)
library(ISLR)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
data(College)
set.seed(0)
index <- sample(nrow(College),size = nrow(College)*0.7)
train<- College[index,]
test<- College[-index,]
b)
b<- lm(Apps~., train)
summary(b)
##
## Call:
## lm(formula = Apps ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6015.1 -474.1 0.9 354.2 7542.5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.218e+00 5.191e+02 -0.004 0.996593
## PrivateYes -6.108e+02 1.718e+02 -3.555 0.000412 ***
## Accept 1.710e+00 4.897e-02 34.911 < 2e-16 ***
## Enroll -8.801e-01 2.478e-01 -3.552 0.000417 ***
## Top10perc 5.588e+01 6.643e+00 8.412 3.84e-16 ***
## Top25perc -1.748e+01 5.210e+00 -3.355 0.000850 ***
## F.Undergrad 6.624e-03 4.476e-02 0.148 0.882411
## P.Undergrad 7.198e-02 3.766e-02 1.911 0.056534 .
## Outstate -8.188e-02 2.285e-02 -3.583 0.000372 ***
## Room.Board 1.528e-01 5.774e-02 2.646 0.008388 **
## Books 2.298e-01 2.781e-01 0.826 0.408976
## Personal -5.833e-02 7.449e-02 -0.783 0.433950
## PhD -1.225e+01 5.672e+00 -2.159 0.031273 *
## Terminal 1.632e+00 6.307e+00 0.259 0.795888
## S.F.Ratio -7.925e-01 1.668e+01 -0.048 0.962127
## perc.alumni -9.085e-01 5.001e+00 -0.182 0.855919
## Expend 4.656e-02 1.412e-02 3.297 0.001042 **
## Grad.Rate 8.222e+00 3.708e+00 2.218 0.027008 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1052 on 525 degrees of freedom
## Multiple R-squared: 0.9293, Adjusted R-squared: 0.927
## F-statistic: 405.9 on 17 and 525 DF, p-value: < 2.2e-16
b_pred<- predict(b, test)
error<-mean((test$Apps - b_pred)^2)
error
## [1] 1155090
c)
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-1
train_mat<-model.matrix(Apps~.,data=train)
test_mat<-model.matrix(Apps~.,data=test)
grid<-10^seq(4,-2,length=100)
ridge<-glmnet(train_mat,train$Apps,alpha=0,lambda=grid,thresh = 1e-12)
cv_ridge<-cv.glmnet(train_mat,train$Apps,alpha=0,lambda=grid,thresh=1e-12)
lam_ridge<-cv_ridge$lambda.min
pred_ridge<- predict(ridge,s=lam_ridge,newx =test_mat)
mean((test$Apps - pred_ridge)^2)
## [1] 1155062
d)
lasso<-glmnet(train_mat,train$Apps,alpha=1,lambda=grid,thresh = 1e-12)
cv_lasso<- cv.glmnet(train_mat,train$Apps,alpha=1,lambda=grid,thresh=1e-12)
lam_lasso<- cv_lasso$lambda.min
pred_lasso<- predict(lasso, s=lam_lasso, newx= test_mat)
mean((test$Apps - pred_lasso)^2)
## [1] 1115179
print(predict(lasso, type = "coefficients", s= lam_lasso))
## 19 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -247.46664595
## (Intercept) .
## PrivateYes -531.29298762
## Accept 1.63644295
## Enroll -0.60645502
## Top10perc 46.96422893
## Top25perc -11.40191119
## F.Undergrad .
## P.Undergrad 0.04796243
## Outstate -0.06486137
## Room.Board 0.14121924
## Books 0.16982773
## Personal -0.03372698
## PhD -9.35950401
## Terminal .
## S.F.Ratio .
## perc.alumni -0.68338296
## Expend 0.04456499
## Grad.Rate 6.25268033
e)
library(pls)
##
## Attaching package: 'pls'
## The following object is masked from 'package:caret':
##
## R2
## The following object is masked from 'package:stats':
##
## loadings
pcrmodel<-pcr(Apps~.,data=train,scale=TRUE,validation="CV")
validationplot(pcrmodel, val.type = "MSEP")
pred_pcr<- predict(pcrmodel, test, ncomp= 17)
mean((test$Apps - pred_pcr)^2)
## [1] 1155090
f)
plsrmodel<-plsr(Apps~.,data=train,scale=TRUE,validation="CV")
validationplot(plsrmodel,val.type="MSEP")
pred_pls<- predict(plsrmodel, test, ncomp= 10)
mean((test$Apps - pred_pls)^2)
## [1] 1196907
g)
test.avg <- mean(test$Apps)
lm_r2 <- 1 - mean((b_pred - test$Apps)^2) / mean((test.avg - test$Apps)^2)
lm_r2
## [1] 0.9204109
ridge_r2 <- 1 - mean((pred_ridge - test$Apps)^2) / mean((test.avg - test$Apps)^2)
ridge_r2
## [1] 0.9204128
lasso_r2 <- 1 - mean((pred_lasso - test$Apps)^2) / mean((test.avg - test$Apps)^2)
lasso_r2
## [1] 0.9231609
pcr_r2 <- 1 - mean((pred_pcr - test$Apps)^2) / mean((test.avg - test$Apps)^2)
pcr_r2
## [1] 0.9204109
pls_r2 <- 1 - mean((pred_pls - test$Apps)^2) / mean((test.avg - test$Apps)^2)
pls_r2
## [1] 0.9175296
The lasso model had the highest r^2 value.
11.
a)
library(MASS)
data(Boston)
set.seed(2021)
index<- sample(Boston$crim, size = nrow(Boston)*0.7)
train<- Boston[-index,]
test<- Boston[index,]
#lm
a<- lm(crim~., train)
pred_a<- predict(a, test)
mean((test$crim - pred_a)^2)
## [1] 7.131272
#ridge
train_mat<-model.matrix(crim~.,data=train)
test_mat<-model.matrix(crim~.,data=test)
grid<-10^seq(4,-2,length=100)
ridge<-glmnet(train_mat,train$crim,alpha=0,lambda=grid,thresh = 1e-12)
cv_ridge<-cv.glmnet(train_mat,train$crim,alpha=0,lambda=grid,thresh=1e-12)
#pcr
pcrmodel<-pcr(crim~.,data=train,scale=TRUE,validation="CV")
validationplot(pcrmodel, val.type = "MSEP")
pred_pcr<- predict(pcrmodel, test, ncomp= 10)
mean((test$crim - pred_pcr)^2)
## [1] 6.468479
summary(pcrmodel)
## Data: X dimension: 481 13
## Y dimension: 481 1
## Fit method: svdpc
## Number of components considered: 13
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 8.798 7.358 7.343 6.944 6.938 6.950 6.985
## adjCV 8.798 7.356 7.341 6.939 6.930 6.944 6.976
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 6.982 6.803 6.873 6.863 6.869 6.825 6.751
## adjCV 6.974 6.791 6.861 6.851 6.857 6.812 6.738
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 48.11 60.85 70.05 76.79 83.29 88.24 91.29 93.58
## crim 30.47 30.87 38.87 39.21 39.24 39.54 39.78 42.49
## 9 comps 10 comps 11 comps 12 comps 13 comps
## X 95.53 97.17 98.48 99.51 100.00
## crim 42.50 42.77 42.79 43.93 45.26
b) I’m choosing the pcr as the final model since it gives the lowest MSE.
c) The pcr is a dimensionally reduced model, so it doesn’t contain all of the variables in their initial form.