Assignment 5

#2 (A.) (iii) Less flexible and will give improved prediction accuracy when its increase in bias is less than its decreasein variance. As lambda increases, flexibility of fit decreases, and so the estimated coefficients decreasewith some being zero. This leads to a substantial decrease in the variance of the predictions for a small increase in bias.

(B.) (iii) Less flexible and will give improved prediction accuracy when its increase in bias is less than its decreasein variance. As lambda increases, flexibility of fit decreases, and so the estimated coefficients decreasewith some being zero. This leads to a substantial decrease in the variance of the predictions for a small increase in bias. However, every variable has a non-zero coefficient.

(C.) (ii) Non-linear models will generally be more flexible, and so predictions tend to have a higher variance andlower bias. So predictions will improve if the variance rises less than a decrease in the bias (bias-variancetrade off).

#Start Question9

library(ISLR)

## Warning: package 'ISLR' was built under R version 3.6.3

library(glmnet)

## Warning: package 'glmnet' was built under R version 3.6.3

## Loading required package: Matrix

## Loaded glmnet 4.1

library(pls)

## Warning: package 'pls' was built under R version 3.6.3

## 
## Attaching package: 'pls'

## The following object is masked from 'package:stats':
## 
##     loadings

#(A.) 
set.seed(11)
train = sample(1:dim(College)[1], dim(College)[1] / 2)
test <- -train
College.train <- College[train, ]
College.test <- College[test, ]

#(B.)
fit.lm <- lm(Apps ~ ., data = College.train)
pred.lm <- predict(fit.lm, College.test)
mean((pred.lm - College.test$Apps)^2)

## [1] 1026096

#Test RSS is 1,538,442.

#(C.)
train.mat <- model.matrix(Apps ~ ., data = College.train)
test.mat <- model.matrix(Apps ~ ., data = College.test)
grid <- 10 ^ seq(4, -2, length = 100)
fit.ridge <- glmnet(train.mat, College.train$Apps, alpha = 0, lambda = grid, thresh = 1e-12)
cv.ridge <- cv.glmnet(train.mat, College.train$Apps, alpha = 0, lambda = grid, thresh = 1e-12)
bestlam.ridge <- cv.ridge$lambda.min
bestlam.ridge

## [1] 0.01

pred.ridge <- predict(fit.ridge, s = bestlam.ridge, newx = test.mat)
mean((pred.ridge - College.test$Apps)^2)

## [1] 1026069

#Test RSS is slightly higher than OLS, 1,608,859.

#(D.)
fit.lasso <- glmnet(train.mat, College.train$Apps, alpha = 1, lambda = grid, thresh = 1e-12)
cv.lasso <- cv.glmnet(train.mat, College.train$Apps, alpha = 1, lambda = grid, thresh = 1e-12)
bestlam.lasso <- cv.lasso$lambda.min
bestlam.lasso

## [1] 0.01

pred.lasso <- predict(fit.lasso, s = bestlam.lasso, newx = test.mat)
mean((pred.lasso - College.test$Apps)^2)

## [1] 1026036

predict(fit.lasso, s = bestlam.lasso, type = "coefficients")

## 19 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept)   37.86520037
## (Intercept)    .         
## PrivateYes  -551.14946609
## Accept         1.74980812
## Enroll        -1.36005786
## Top10perc     65.55655577
## Top25perc    -22.52640339
## F.Undergrad    0.10181853
## P.Undergrad    0.01789131
## Outstate      -0.08706371
## Room.Board     0.15384585
## Books         -0.12227313
## Personal       0.16194591
## PhD          -14.29638634
## Terminal      -1.03118224
## S.F.Ratio      4.47956819
## perc.alumni   -0.45456280
## Expend         0.05618050
## Grad.Rate      9.07242834

#Test RSS is slightly higher than OLS, 1635280.

#(E.)
fit.pcr <- pcr(Apps ~ ., data = College.train, scale = TRUE, validation = "CV")
validationplot(fit.pcr, val.type = "MSEP")

pred.pcr <- predict(fit.pcr, College.test, ncomp = 10)
mean((pred.pcr - College.test$Apps)^2)

## [1] 1867486

#Test RSS for PCR is about 3,014,496.

#(F.)
fit.pls <- plsr(Apps ~ ., data = College.train, scale = TRUE, validation = "CV")
validationplot(fit.pls, val.type = "MSEP")

pred.pls <- predict(fit.pls, College.test, ncomp = 10)
mean((pred.pls - College.test$Apps)^2)

## [1] 1031287

#Test RSS for PLS is about 1,508,987.

#(G.)
test.avg <- mean(College.test$Apps)
lm.r2 <- 1 - mean((pred.lm - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
ridge.r2 <- 1 - mean((pred.ridge - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
lasso.r2 <- 1 - mean((pred.lasso - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
pcr.r2 <- 1 - mean((pred.pcr - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
pls.r2 <- 1 - mean((pred.pls - College.test$Apps)^2) / mean((test.avg - College.test$Apps)^2)
#End Question9

#Start Question11
library(MASS)
library(leaps)

## Warning: package 'leaps' was built under R version 3.6.3

library(glmnet)
library(pls)

#(A.)
#Best Subset Selection:
predict.regsubsets = function(object, newdata, id, ...) {
    form = as.formula(object$call[[2]])
    mat = model.matrix(form, newdata)
    coefi = coef(object, id = id)
    mat[, names(coefi)] %*% coefi
}

k = 10
p = ncol(Boston) - 1
folds = sample(rep(1:k, length = nrow(Boston)))
cv.errors = matrix(NA, k, p)

for (i in 1:k) {
    best.fit = regsubsets(crim ~ ., data = Boston[folds != i, ], nvmax = p)
    for (j in 1:p) {
        pred = predict(best.fit, Boston[folds == i, ], id = j)
        cv.errors[i, j] = mean((Boston$crim[folds == i] - pred)^2)
    }
}
rmse.cv = sqrt(apply(cv.errors, 2, mean))
plot(rmse.cv, pch = 19, type = "b")

which.min(rmse.cv)

## [1] 12

rmse.cv[which.min(rmse.cv)]

## [1] 6.54371

#Lasso method for a sparse model.
x = model.matrix(crim ~ . - 1, data = Boston)
y = Boston$crim
cv.lasso = cv.glmnet(x, y, type.measure = "mse")
plot(cv.lasso)

coef(cv.lasso)

## 14 x 1 sparse Matrix of class "dgCMatrix"
##                     1
## (Intercept) 1.0894283
## zn          .        
## indus       .        
## chas        .        
## nox         .        
## rm          .        
## age         .        
## dis         .        
## rad         0.2643196
## tax         .        
## ptratio     .        
## black       .        
## lstat       .        
## medv        .

sqrt(cv.lasso$cvm[cv.lasso$lambda == cv.lasso$lambda.1se])

## [1] 7.437989

#Ridge Regression
x = model.matrix(crim ~ . - 1, data = Boston)
y = Boston$crim
cv.ridge = cv.glmnet(x, y, type.measure = "mse", alpha = 0)
plot(cv.ridge)

coef(cv.ridge)

## 14 x 1 sparse Matrix of class "dgCMatrix"
##                        1
## (Intercept)  1.523899548
## zn          -0.002949852
## indus        0.029276741
## chas        -0.166526006
## nox          1.874769661
## rm          -0.142852604
## age          0.006207995
## dis         -0.094547258
## rad          0.045932737
## tax          0.002086668
## ptratio      0.071258052
## black       -0.002605281
## lstat        0.035745604
## medv        -0.023480540

sqrt(cv.ridge$cvm[cv.ridge$lambda == cv.ridge$lambda.1se])

## [1] 7.665736

#PCR
pcr.fit = pcr(crim ~ ., data = Boston, scale = TRUE, validation = "CV")
summary(pcr.fit)

## Data:    X dimension: 506 13 
##  Y dimension: 506 1
## Fit method: svdpc
## Number of components considered: 13
## 
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##        (Intercept)  1 comps  2 comps  3 comps  4 comps  5 comps  6 comps
## CV            8.61    7.201    7.204    6.765    6.774    6.775    6.818
## adjCV         8.61    7.198    7.201    6.761    6.767    6.770    6.810
##        7 comps  8 comps  9 comps  10 comps  11 comps  12 comps  13 comps
## CV       6.804    6.664    6.691     6.692     6.689     6.658     6.586
## adjCV    6.796    6.656    6.682     6.682     6.678     6.646     6.574
## 
## TRAINING: % variance explained
##       1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps  8 comps
## X       47.70    60.36    69.67    76.45    82.99    88.00    91.14    93.45
## crim    30.69    30.87    39.27    39.61    39.61    39.86    40.14    42.47
##       9 comps  10 comps  11 comps  12 comps  13 comps
## X       95.40     97.04     98.46     99.52     100.0
## crim    42.55     42.78     43.04     44.13      45.4

#(B.) & (C.)
#I would choose the 9 parameter best subset model because it had the best cross-validated RMSE, next to PCR, but it was simpler model than the 13 component PCR model.

#End Question11

Assignment 5

Josh Gauntt

2/23/2021