####Same as lasso, ridge regression is less flexible and will give improved prediction accuracy when its increase in bias is less than its decrease in variance.
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.5
set.seed(11)
sum(is.na(College))
## [1] 0
train.size = dim(College)[1] / 2
train = sample(1:dim(College)[1], train.size)
test = -train
College.train = College[train, ]
College.test = College[test, ]
lm.fit = lm(Apps~., data=College.train)
lm.pred = predict(lm.fit, College.test)
mean((College.test[, "Apps"] - lm.pred)^2)
## [1] 1026096
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.5
## Loading required package: Matrix
## Loaded glmnet 4.1-2
train.mat <- model.matrix(Apps ~ ., data = College.train)
test.mat <- model.matrix(Apps ~ ., data = College.test)
grid <- 10 ^ seq(4, -2, length = 100)
fit.ridge <- glmnet(train.mat, College.train$Apps, alpha = 0, lambda = grid, thresh = 1e-12)
cv.ridge <- cv.glmnet(train.mat, College.train$Apps, alpha = 0, lambda = grid, thresh = 1e-12)
bestlam.ridge <- cv.ridge$lambda.min
bestlam.ridge
## [1] 0.01
pred.ridge <- predict(fit.ridge, s = bestlam.ridge, newx = test.mat)
mean((pred.ridge - College.test$Apps)^2)
## [1] 1026069
fit.lasso <- glmnet(train.mat, College.train$Apps, alpha = 1, lambda = grid, thresh = 1e-12)
cv.lasso <- cv.glmnet(train.mat, College.train$Apps, alpha = 1, lambda = grid, thresh = 1e-12)
bestlam.lasso <- cv.lasso$lambda.min
bestlam.lasso
## [1] 0.01
pred.lasso <- predict(fit.lasso, s = bestlam.lasso, newx = test.mat)
mean((pred.lasso - College.test$Apps)^2)
## [1] 1026036
predict(fit.lasso, s = bestlam.lasso, type = "coefficients")
## 19 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 37.86520037
## (Intercept) .
## PrivateYes -551.14946609
## Accept 1.74980812
## Enroll -1.36005786
## Top10perc 65.55655577
## Top25perc -22.52640339
## F.Undergrad 0.10181853
## P.Undergrad 0.01789131
## Outstate -0.08706371
## Room.Board 0.15384585
## Books -0.12227313
## Personal 0.16194591
## PhD -14.29638634
## Terminal -1.03118224
## S.F.Ratio 4.47956819
## perc.alumni -0.45456280
## Expend 0.05618050
## Grad.Rate 9.07242834
library(pls)
## Warning: package 'pls' was built under R version 4.0.5
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
fit.pcr <- pcr(Apps ~ ., data = College.train, scale = TRUE, validation = "CV")
validationplot(fit.pcr, val.type = "MSEP")
pred.pcr <- predict(fit.pcr, College.test, ncomp = 10)
mean((pred.pcr - College.test$Apps)^2)
## [1] 1867486
fit.pls <- plsr(Apps ~ ., data = College.train, scale = TRUE, validation = "CV")
validationplot(fit.pls, val.type = "MSEP")
pred.pls <- predict(fit.pls, College.test, ncomp = 10)
mean((pred.pls - College.test$Apps)^2)
## [1] 1031287
test.avg <- mean(College.test$Apps)
lm.r2 <- 1 - mean((pred.pls - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
ridge.r2 <- 1 - mean((pred.ridge - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
lasso.r2 <- 1 - mean((pred.lasso - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
pcr.r2 <- 1 - mean((pred.pcr - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
pls.r2 <- 1 - mean((pred.pls - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
set.seed(1)
library(MASS)
library(leaps)
## Warning: package 'leaps' was built under R version 4.0.5
library(glmnet)
data(Boston)
set.seed(1)
predict.regsubsets <- function(object, newdata, id, ...) {
form <- as.formula(object$call[[2]])
mat <- model.matrix(form, newdata)
coefi <- coef(object, id = id)
xvars <- names(coefi)
mat[, xvars] %*% coefi
}
k = 10
folds <- sample(1:k, nrow(Boston), replace = TRUE)
cv.errors <- matrix(NA, k, 13, dimnames = list(NULL, paste(1:13)))
for (j in 1:k) {
best.fit <- regsubsets(crim ~ ., data = Boston[folds != j, ], nvmax = 13)
for (i in 1:13) {
pred <- predict(best.fit, Boston[folds == j, ], id = i)
cv.errors[j, i] <- mean((Boston$crim[folds == j] - pred)^2)
}
}
mean.cv.errors <- apply(cv.errors, 2, mean)
plot(mean.cv.errors, type = "b", xlab = "Number of variables", ylab = "CV error")
#### We may see that cross-validation selects an 12-variables model. We have a CV estimate for the test MSE equal to 41.0345657. Next we proceed with the lasso.
x <- model.matrix(crim ~ ., Boston)[, -1]
y <- Boston$crim
cv.out <- cv.glmnet(x, y, alpha = 1, type.measure = "mse")
plot(cv.out)
#### Here cross-validation selects a \(λ\) equal to 0.0467489. We have a CV estimate for the test MSE equal to 42.134324. Next, we proceed with ridge regression.
cv.out <- cv.glmnet(x, y, alpha = 0, type.measure = "mse")
plot(cv.out)
#### Here cross-validation selects a λ equal to 0.5374992. We have a CV estimate for the test MSE equal to 42.9834518. Finally, we proceed with PCR.
pcr.fit <- pcr(crim ~ ., data = Boston, scale = TRUE, validation = "CV")
summary(pcr.fit)
## Data: X dimension: 506 13
## Y dimension: 506 1
## Fit method: svdpc
## Number of components considered: 13
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 8.61 7.198 7.198 6.786 6.762 6.790 6.821
## adjCV 8.61 7.195 7.195 6.780 6.753 6.784 6.813
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 6.822 6.689 6.712 6.720 6.712 6.664 6.593
## adjCV 6.812 6.679 6.701 6.708 6.700 6.651 6.580
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 47.70 60.36 69.67 76.45 82.99 88.00 91.14 93.45
## crim 30.69 30.87 39.27 39.61 39.61 39.86 40.14 42.47
## 9 comps 10 comps 11 comps 12 comps 13 comps
## X 95.40 97.04 98.46 99.52 100.0
## crim 42.55 42.78 43.04 44.13 45.4
validationplot(pcr.fit, val.type = "MSEP")
#### Here cross-validation selects \(M\) to be equal to 14 (so, no dimension reduction). We have a CV estimate for the test MSE equal to 45.693568.