Chapter 06 (page 259): 2, 9, 11
library(ISLR)
## Warning: package 'ISLR' was built under R version 3.6.3
data("College")
training = dim(College)[1]*.70
traincollege = sample(1:dim(College)[1], training)
testcollege = -traincollege
train.college = College[traincollege, ]
test.college = College[testcollege, ]
Test error is 1612476
lmodel1 = lm(Apps~., data = train.college)
lpred1 = predict(lmodel1, test.college)
mean((test.college$Apps - lpred1)^2)
## [1] 1671926
Test error for ridge regression is higher than OLS, 1541281
library(glmnet)
## Warning: package 'glmnet' was built under R version 3.6.3
## Loading required package: Matrix
## Loaded glmnet 4.0-2
trainmat = model.matrix(Apps~., data = train.college)
testmat = model.matrix(Apps~., data = test.college)
grid = 10^seq(4,-2,length=100)
ridgemodel = cv.glmnet(trainmat, train.college$Apps, alpha = 0, lamba = grid, thresh=1e-12)
lambda1 = ridgemodel$lambda.min
lambda1
## [1] 361.6466
ridgepredict = predict(ridgemodel, s = lambda1, newx = testmat)
mean((test.college$Apps-ridgepredict)^2)
## [1] 1664815
Test error for Lasso model is closer to the ridge model with 11 non zero coefficients.
lassomodel = cv.glmnet(trainmat, train.college$Apps, alpha = 1, lamba = grid, thresh=1e-12)
lasso1 = lassomodel$lambda.min
lasso1
## [1] 2.118172
lassopredict = predict(lassomodel, s = lasso1, newx = testmat)
mean((test.college$Apps-lassopredict)^2)
## [1] 1674117
predict(lassomodel, s=lasso1, type = "coefficients")
## 19 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -4.722459e+02
## (Intercept) .
## PrivateYes -3.494994e+02
## Accept 1.656282e+00
## Enroll -1.039283e+00
## Top10perc 3.648514e+01
## Top25perc -5.849045e+00
## F.Undergrad 7.012858e-02
## P.Undergrad 9.448019e-03
## Outstate -8.265634e-02
## Room.Board 1.130324e-01
## Books 2.528740e-02
## Personal 5.669415e-02
## PhD -8.705398e+00
## Terminal .
## S.F.Ratio 1.343523e+01
## perc.alumni .
## Expend 5.731901e-02
## Grad.Rate 6.048788e+00
Test error for PCR model is closer to the OLS model
library(pls)
## Warning: package 'pls' was built under R version 3.6.3
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
pcrmodel = pcr(Apps~., data = train.college, scale = TRUE, validation = "CV")
validationplot(pcrmodel, val.type = "MSEP")
pcrpredict = predict(pcrmodel, test.college, ncomp = 17)
mean((test.college$Apps- pcrpredict)^2)
## [1] 1671926
Test error for PLSR model is close to the OLS model
plsrmodel = plsr(Apps~., data = train.college, scale = TRUE, validation = "CV")
validationplot(plsrmodel,val.type = "MSEP")
plsrpredict = predict(plsrmodel, test.college, ncomp = 10)
mean((test.college$Apps- plsrpredict)^2)
## [1] 1641793
Ridge Model and Lasso have the lowest test error but the rest of the models were similar ever so slightly.
Least Square Test: 1612476
Ridge Model Test: 1541281
Lasso Model Test: 1568820
PCR Model Test: 1612476
PLS Model Test: 1614046
library(MASS)
attach(Boston)
training2 = sample(nrow(Boston),nrow(Boston)*0.60)
train.boston = Boston[training2, ]
test.boston = Boston[-training2, ]
Lasso Model:
lassomat = model.matrix(crim~. -1, data = Boston)
lasso2 = cv.glmnet(lassomat, Boston$crim, type.measure = "mse")
plot(lasso2)
coef(lasso2)
## 14 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 1.4186415
## zn .
## indus .
## chas .
## nox .
## rm .
## age .
## dis .
## rad 0.2298449
## tax .
## ptratio .
## black .
## lstat .
## medv .
Ridge Regression
lassomat3 = model.matrix(crim~. -1, data = Boston)
lasso3 = cv.glmnet(lassomat, Boston$crim, alpha = 0, type.measure = "mse")
plot(lasso3)
coef(lasso3)
## 14 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 0.887549642
## zn -0.002684451
## indus 0.035521503
## chas -0.242070223
## nox 2.338520332
## rm -0.166158479
## age 0.007601568
## dis -0.120012848
## rad 0.063692316
## tax 0.002813534
## ptratio 0.090098298
## black -0.003542339
## lstat 0.046779459
## medv -0.030585796
sqrt(lasso2$cvm[lasso2$lambda == lasso2$lambda.1se])
## [1] 7.572692
sqrt(lasso3$cvm[lasso3$lambda == lasso3$lambda.1se])
## [1] 7.391118