library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
library(pls)
## Warning: package 'pls' was built under R version 4.0.4
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.4
## Loading required package: Matrix
## Loaded glmnet 4.1-1
library(MASS)
library(leaps)
## Warning: package 'leaps' was built under R version 4.0.4
iii With increasing λ variance starts to decrease faster than bias increases leading to a low in the U shaped MSE curve.
iii With increasing λ variance starts to decrease faster than bias increases leading to a low in the U shaped MSE curve.
ii
data(College)
set.seed(123)
trainid = sample(1:nrow(College), nrow(College)/2)
train = College[trainid,]
test = College[-trainid,]
lm.fit = lm(Apps~., data=train)
lm.pred = predict(lm.fit, test)
lm.err = mean((test$Apps - lm.pred)^2)
lm.err
## [1] 1373995
train.X = model.matrix(Apps ~ ., data = train)
train.Y = train[, "Apps"]
test.X = model.matrix(Apps ~ ., data = test)
test.Y = test[, "Apps"]
grid = 10 ^ seq(4, -2, length=100)
ridge.mod = glmnet(train.X, train.Y, alpha =0, lambda=grid, thresh = 1e-12)
lambda.best = ridge.mod$lambda.min
ridge.pred = predict(ridge.mod, newx= test.X, s=lambda.best)
(ridge.err = mean((test.Y - ridge.pred)^2))
## [1] 2070580
lasso.mod = glmnet(train.X, train.Y, alpha =1, lambda=grid, thresh = 1e-12)
lasso.cv = cv.glmnet(train.X, train.Y, alpha =1, lambda=grid, thresh = 1e-12)
lambda.best = lasso.cv$lambda.min
lasso.pred = predict(lasso.mod, newx= test.X, s=lambda.best)
(lasso.err = mean((test.Y - lasso.pred)^2))
## [1] 1399213
predict(lasso.mod, s = lambda.best, type="coefficients")
## 19 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -448.89015205
## (Intercept) .
## PrivateYes -707.26279699
## Accept 1.33100318
## Enroll .
## Top10perc 26.66029296
## Top25perc .
## F.Undergrad .
## P.Undergrad .
## Outstate -0.02716977
## Room.Board 0.12159181
## Books .
## Personal -0.03546138
## PhD -2.99477563
## Terminal -5.67422601
## S.F.Ratio .
## perc.alumni -6.74346397
## Expend 0.07451472
## Grad.Rate 7.18317440
pcr.fit = pcr(Apps~., data=train, scale=TRUE, validation="CV")
validationplot(pcr.fit, val.type="MSEP")
summary(pcr.fit)
## Data: X dimension: 388 17
## Y dimension: 388 1
## Fit method: svdpc
## Number of components considered: 17
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 3258 3228 1667 1617 1317 1325 1220
## adjCV 3258 3227 1665 1615 1304 1324 1216
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 1211 1213 1159 1161 1161 1166 1173
## adjCV 1212 1213 1156 1158 1158 1163 1170
## 14 comps 15 comps 16 comps 17 comps
## CV 1174 1165 999.5 1014
## adjCV 1170 1162 996.4 1010
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 32.969 59.69 66.68 72.25 77.31 81.82 85.18 88.34
## Apps 3.259 74.43 76.35 84.40 84.42 86.69 86.79 87.01
## 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps 15 comps
## X 91.32 93.61 95.48 97.05 98.11 98.90 99.40
## Apps 88.38 88.39 88.47 88.49 88.50 88.51 88.74
## 16 comps 17 comps
## X 99.81 100.00
## Apps 91.48 91.58
pcr.pred = predict(pcr.fit, test, ncomp=10)
(pcr.err = mean((test$Apps - pcr.pred)^2))
## [1] 2887472
pls.fit = plsr(Apps~., data=train, scale=TRUE, validation="CV")
validationplot(pls.fit, val.type="MSEP")
pls.pred = predict(pls.fit, test, ncomp=10)
(pls.err = mean((test$Apps - pls.pred)^2))
## [1] 1384151
test.avg = mean(test$Apps)
lm.r2 =1 - lm.err/mean((test.avg - test$Apps)^2)
ridge.r2 =1 - ridge.err/mean((test.avg - test$Apps)^2)
lasso.r2 =1 - lasso.err/mean((test.avg - test$Apps)^2)
pcr.r2 =1 - pcr.err/mean((test.avg - test$Apps)^2)
pls.r2 =1 - pls.err/mean((test.avg - test$Apps)^2)
all.r2 = c(lm.r2, ridge.r2, lasso.r2, pcr.r2, pls.r2)
names(all.r2) = c("lm", "ridge", "lasso", "pcr", "pls")
barplot(all.r2 )
very similar results with pcr with a slightly lower accuracy..
data(Boston)
set.seed(12)
predict.regsubsets = function(object, newdata, id, ...) {
form = as.formula(object$call[[2]])
mat = model.matrix(form, newdata)
coefi = coef(object, id = id)
xvars = names(coefi)
mat[, xvars] %*% coefi
}
k = 10
folds = sample(1:k, nrow(Boston), replace = TRUE)
cv.errors = matrix(NA, k, 13, dimnames = list(NULL, paste(1:13)))
for (j in 1:k) {
best.fit = regsubsets(crim ~ ., data = Boston[folds != j, ], nvmax = 13)
for (i in 1:13) {
pred = predict(best.fit, Boston[folds == j, ], id = i)
cv.errors[j, i] = mean((Boston$crim[folds == j] - pred)^2)
}
}
mean.cv.errors = apply(cv.errors, 2, mean)
plot(mean.cv.errors, type = "b", xlab = "Number of variables", ylab = "CV error")
min(mean.cv.errors)
## [1] 45.33533
regfit.best = regsubsets(crim~., data=Boston, nvmax=13)
coef(regfit.best, 12)
## (Intercept) zn indus chas nox
## 16.985713928 0.044673247 -0.063848469 -0.744367726 -10.202169211
## rm dis rad tax ptratio
## 0.439588002 -0.993556631 0.587660185 -0.003767546 -0.269948860
## black lstat medv
## -0.007518904 0.128120290 -0.198877768
x = model.matrix(crim ~ ., Boston)[, -1]
y = Boston$crim
cv.out = cv.glmnet(x, y, alpha = 1, type.measure = "mse")
plot(cv.out)
cv.out
##
## Call: cv.glmnet(x = x, y = y, type.measure = "mse", alpha = 1)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 0.0563 50 43.18 12.10 11
## 1se 3.0758 7 54.76 13.98 1
cv.out = cv.glmnet(x, y, alpha = 0, type.measure = "mse")
cv.out
##
## Call: cv.glmnet(x = x, y = y, type.measure = "mse", alpha = 0)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 0.54 100 43.16 14.93 13
## 1se 74.44 47 58.00 18.58 13
pcr.fit = pcr(crim ~ ., data = Boston, scale = TRUE, validation = "CV")
validationplot(pcr.fit, val.type = "MSEP")
Best subset selection yields the best results.
1 Feature is missing as the model is created by Best subset selection.