library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
library(MASS)
library(leaps)
## Warning: package 'leaps' was built under R version 4.0.3
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.0.4
## Loading required package: Matrix
## Loaded glmnet 4.1-1
library(pls)
## Warning: package 'pls' was built under R version 4.0.4
## 
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
## 
##     loadings
library(caret)
## Warning: package 'caret' was built under R version 4.0.3
## Loading required package: lattice
## Loading required package: ggplot2
## 
## Attaching package: 'caret'
## The following object is masked from 'package:pls':
## 
##     R2

##Question 2

2A. The lasso, relative to least sqaures is less flexible and hence will give improved prediction accuracy when its increase in bias is less than its decrease in variance

2B. Ridge regression relative to least sqaures is less flexible and hence will give improved prediction accuracy when its increase in bias is less than its decrease in variance

2C. Non-linear methods relative to least squares is more flexible and hence will give improved prediction accuracy when its increase in variance is less than its decrease in bias

##Question 9

data("College")

9A. Split data into train and test

set.seed(8)
train=sample(1:nrow(College), nrow(College)/2)
test = (-train)
College.train = College[train,]
College.test = College[test,]
#set.seed(8)
#inTrain = createDataPartition(y=College$Apps, p=.8, list = FALSE)

#College.train = College[inTrain,]
#College.test = College[-inTrain,]

Make the cross-validation

train_control = trainControl(method = "cv", number = 10, savePredictions = "all")

9B. Linear Regression

set.seed(8)
lm.fit = train(Apps ~., data = College.train, trControl = train_control, method = "lm")
summary(lm.fit)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3182.1  -428.9   -20.4   297.3  6907.2 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.716e+02  5.242e+02  -0.709  0.47885    
## PrivateYes  -8.712e+02  1.864e+02  -4.674 4.15e-06 ***
## Accept       1.289e+00  6.700e-02  19.230  < 2e-16 ***
## Enroll      -2.028e-01  2.515e-01  -0.806  0.42057    
## Top10perc    3.815e+01  7.568e+00   5.041 7.27e-07 ***
## Top25perc   -1.095e+01  6.102e+00  -1.794  0.07363 .  
## F.Undergrad  2.368e-02  4.188e-02   0.565  0.57217    
## P.Undergrad  1.452e-03  4.922e-02   0.030  0.97648    
## Outstate    -8.025e-02  2.613e-02  -3.071  0.00229 ** 
## Room.Board   2.104e-01  6.632e-02   3.173  0.00164 ** 
## Books        1.259e-01  2.795e-01   0.450  0.65265    
## Personal     2.873e-02  8.217e-02   0.350  0.72682    
## PhD         -3.195e+00  6.090e+00  -0.525  0.60017    
## Terminal    -9.109e+00  7.208e+00  -1.264  0.20714    
## S.F.Ratio    1.306e+01  1.851e+01   0.705  0.48108    
## perc.alumni  4.301e+00  5.877e+00   0.732  0.46468    
## Expend       1.084e-01  1.823e-02   5.948 6.29e-09 ***
## Grad.Rate    6.713e+00  3.792e+00   1.770  0.07749 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 980.2 on 370 degrees of freedom
## Multiple R-squared:  0.9286, Adjusted R-squared:  0.9254 
## F-statistic: 283.3 on 17 and 370 DF,  p-value: < 2.2e-16
lm.fit
## Linear Regression 
## 
## 388 samples
##  17 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 349, 348, 350, 348, 349, 351, ... 
## Resampling results:
## 
##   RMSE     Rsquared   MAE     
##   992.857  0.9212021  608.8517
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
lin.preds = predict(lm.fit, newdata=College.test)
linear.mean.err = mean((College.test$Apps - lin.preds)^2)
linear.mean.err
## [1] 1469722
summary(lm.fit)$r.squared
## [1] 0.9286493
lin.RMSE = RMSE(lin.preds, College.test$Apps)
lin.RMSE
## [1] 1212.321
lin.test.r2 = R2(lin.preds, College.test$Apps)
lin.test.r2
## [1] 0.9260995

9C. Ridge Regression

x_train = model.matrix(Apps~., College.train)[,-1]

y_train = College.train$Apps


x_test = model.matrix(Apps~., College.test)[,-1]

y_test = College.test$Apps
grid=10^seq(4,-2,length=100)
set.seed(8)

cv_ridge = cv.glmnet(x_train, y_train, alpha = 0, lambda = grid , thresh = 1e-12) 

optimal_lambda = cv_ridge$lambda.min
ridge.fit = glmnet(x_train, y_train, alpha = 0, lambda = optimal_lambda)
coef(ridge.fit)
## 18 x 1 sparse Matrix of class "dgCMatrix"
##                        s0
## (Intercept) -4.539741e+02
## PrivateYes  -8.939414e+02
## Accept       1.182170e+00
## Enroll       5.656325e-02
## Top10perc    3.524110e+01
## Top25perc   -8.931204e+00
## F.Undergrad  1.997640e-02
## P.Undergrad  1.024211e-03
## Outstate    -6.842933e-02
## Room.Board   2.117602e-01
## Books        1.463373e-01
## Personal     1.892829e-02
## PhD         -2.829440e+00
## Terminal    -8.907760e+00
## S.F.Ratio    1.016648e+01
## perc.alumni  2.019977e+00
## Expend       1.057877e-01
## Grad.Rate    7.280783e+00
ridge.preds = predict(ridge.fit, s = optimal_lambda, newx = x_test)

ridge.mean.error = mean((College.test$Apps - ridge.preds)^2)

ridge.mean.error
## [1] 1596463
ridge.RMSE =  RMSE(ridge.preds, College.test$Apps)
ridge.RMSE
## [1] 1263.512
ridge.test.rsquared = R2(ridge.preds, College.test$Apps)
ridge.test.rsquared
##              1
## [1,] 0.9223008

9D. Lasso Regression

set.seed(8)

cv_lasso = cv.glmnet(x_train, y_train, alpha = 1, lambda = grid , thresh = 1e-12) 

optimal_lambda_lasso = cv_lasso$lambda.min
lasso.fit = glmnet(x_train, y_train, alpha = 0, lambda = optimal_lambda_lasso)
coef(lasso.fit)
## 18 x 1 sparse Matrix of class "dgCMatrix"
##                        s0
## (Intercept) -4.446305e+02
## PrivateYes  -8.916340e+02
## Accept       1.193812e+00
## Enroll       3.124709e-02
## Top10perc    3.556814e+01
## Top25perc   -9.160756e+00
## F.Undergrad  1.976540e-02
## P.Undergrad  1.223561e-03
## Outstate    -6.979570e-02
## Room.Board   2.117188e-01
## Books        1.440272e-01
## Personal     1.993674e-02
## PhD         -2.867270e+00
## Terminal    -8.934412e+00
## S.F.Ratio    1.049127e+01
## perc.alumni  2.270297e+00
## Expend       1.061105e-01
## Grad.Rate    7.217923e+00
lasso.preds = predict(lasso.fit, s = optimal_lambda_lasso, newx = x_test)

lasso.mean.error = mean((College.test$Apps - lasso.preds)^2)

lasso.mean.error
## [1] 1581287
lasso.RMSE =  RMSE(lasso.preds, College.test$Apps)
lasso.RMSE
## [1] 1257.492
lasso.test.rsquared = R2(lasso.preds, College.test$Apps)
lasso.test.rsquared
##              1
## [1,] 0.9227781

9E. PCR

set.seed(8)

pcr.fit = train(Apps ~., data=College.train, scale = TRUE, trControl = train_control, tuneLength=ncol(College.train), method = 'pcr')
plot(pcr.fit)

pcr.fit$bestTune
##    ncomp
## 16    16
print(pcr.fit)
## Principal Component Analysis 
## 
## 388 samples
##  17 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 349, 348, 350, 348, 349, 351, ... 
## Resampling results across tuning parameters:
## 
##   ncomp  RMSE      Rsquared    MAE      
##    1     3491.496  0.02142928  2508.1309
##    2     1702.096  0.78319854  1243.8442
##    3     1712.234  0.78177302  1249.3233
##    4     1401.340  0.84817174   980.3353
##    5     1285.233  0.86588277   869.0904
##    6     1272.717  0.86980666   867.7251
##    7     1279.593  0.86916256   871.0263
##    8     1267.624  0.87124919   854.5579
##    9     1195.609  0.88696663   786.2901
##   10     1180.477  0.89018765   761.6441
##   11     1183.515  0.89032063   769.8830
##   12     1192.774  0.88900073   776.8107
##   13     1203.892  0.88712197   785.0383
##   14     1208.306  0.88636749   792.8389
##   15     1213.532  0.88508601   795.0885
##   16     1005.375  0.92093882   627.9100
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 16.
summary(pcr.fit$finalModel)
## Data:    X dimension: 388 17 
##  Y dimension: 388 1
## Fit method: svdpc
## Number of components considered: 16
## TRAINING: % variance explained
##           1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X          31.147    57.16    64.05    70.31    75.95    81.19    84.85
## .outcome    0.121    77.52    77.57    84.78    87.29    87.30    87.31
##           8 comps  9 comps  10 comps  11 comps  12 comps  13 comps  14 comps
## X           88.14    91.11     93.47     95.39     97.10     98.15     98.96
## .outcome    87.47    89.00     89.18     89.36     89.39     89.40     89.44
##           15 comps  16 comps
## X            99.44     99.86
## .outcome     89.48     92.65
pcr.pred=predict(pcr.fit,College.test,ncomp=16)
pcr.mean.err = mean((College.test$Apps - pcr.pred)^2)
pcr.mean.err
## [1] 1647232
pcr.RMSE =  RMSE(pcr.pred, College.test$Apps)
pcr.RMSE
## [1] 1283.445
pcr.test.rsquared = R2(pcr.pred, College.test$Apps)
pcr.test.rsquared
## [1] 0.918927

PLS

set.seed(8)

pls.fit = train(Apps ~., data=College.train, scale = TRUE, trControl = train_control, tuneLength=ncol(College.train), method = 'pls')
plot(pls.fit)

pls.fit$bestTune
##    ncomp
## 10    10
summary(pls.fit$finalModel)
## Data:    X dimension: 388 17 
##  Y dimension: 388 1
## Fit method: oscorespls
## Number of components considered: 10
## TRAINING: % variance explained
##           1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X           25.88    32.45    61.51    66.05    69.11    72.54    76.17
## .outcome    82.16    89.39    89.73    90.98    92.10    92.71    92.78
##           8 comps  9 comps  10 comps
## X           81.21    83.34     85.64
## .outcome    92.80    92.82     92.84
pls.fit
## Partial Least Squares 
## 
## 388 samples
##  17 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 349, 348, 350, 348, 349, 351, ... 
## Resampling results across tuning parameters:
## 
##   ncomp  RMSE       Rsquared   MAE      
##    1     1518.6058  0.8254592  1103.6556
##    2     1189.7147  0.8883682   771.5916
##    3     1164.2923  0.8930979   758.1285
##    4     1128.5224  0.8996012   726.8939
##    5     1085.3298  0.9083062   698.7793
##    6     1023.9233  0.9173131   646.3927
##    7     1013.6656  0.9194031   631.5614
##    8     1000.8815  0.9210369   621.1063
##    9      989.5432  0.9225253   611.3539
##   10      988.6935  0.9223078   609.8801
##   11      992.6419  0.9214769   608.5388
##   12      993.3988  0.9211094   610.1710
##   13      992.3213  0.9213370   608.3659
##   14      992.4023  0.9212801   608.5394
##   15      992.4649  0.9212577   608.8236
##   16      992.8481  0.9212052   608.8398
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 10.
set.seed(8)
pls.pred=predict(pls.fit,College.test,ncomp=10)
pls.mean.err =mean((College.test$Apps - pls.pred)^2)
pls.mean.err
## [1] 1495875
pls.RSME = RMSE(pls.pred, College.test$Apps)
pls.RSME
## [1] 1223.06
pls.test.rsquared = R2(pls.pred, College.test$Apps)
pls.test.rsquared
## [1] 0.9251608

9G.

#Creating the dataframe for comparison 


data.frame(method = c("OLS", "Ridge", "Lasso", "PCR", "PLS"), 
           MSE = c(linear.mean.err, ridge.mean.error, lasso.mean.error, pcr.mean.err, pls.mean.err), 
           RMSE = c(lin.RMSE, ridge.RMSE, lasso.RMSE, pcr.RMSE, pls.RSME),
           test.rsquared = c(lin.test.r2, ridge.test.rsquared, lasso.test.rsquared, pcr.test.rsquared, pls.test.rsquared))
##   method     MSE     RMSE test.rsquared
## 1    OLS 1469722 1212.321     0.9260995
## 2  Ridge 1596463 1263.512     0.9223008
## 3  Lasso 1581287 1257.492     0.9227781
## 4    PCR 1647232 1283.445     0.9189270
## 5    PLS 1495875 1223.060     0.9251608

The methods produce slightly different mean squared error, root mean squared error, and R-squared values, however, the difference is very small. The OLS and PLS have the highest r-squared and lowest MSE and RMSE. The PCR has the highest MSE and RMSE and the lowest test r-sqaured. The models seem to have good predictive power.

#Question 11

data("Boston")
set.seed(8)
train=sample(1:nrow(Boston), nrow(Boston)/2)
test = (-train)
Boston.train = Boston[train,]
Boston.test = Boston[test,]

11A.

train_control_b = trainControl(method = "cv", number = 10, savePredictions = "all")

Linear Regression

set.seed(8)
lm.fit.bost = train(crim ~., data = Boston.train, trControl = train_control_b, method = "lm")
summary(lm.fit.bost)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.762 -2.149 -0.289  1.345 52.846 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.198926   8.222216   0.632   0.5278    
## zn           0.039713   0.021802   1.822   0.0698 .  
## indus       -0.072151   0.102902  -0.701   0.4839    
## chas        -1.259336   1.356609  -0.928   0.3542    
## nox         -5.830684   5.953000  -0.979   0.3283    
## rm          -0.350889   0.720973  -0.487   0.6269    
## age         -0.016732   0.021824  -0.767   0.4440    
## dis         -0.712840   0.308824  -2.308   0.0218 *  
## rad          0.541735   0.107040   5.061 8.32e-07 ***
## tax         -0.005171   0.006233  -0.830   0.4076    
## ptratio     -0.175709   0.207705  -0.846   0.3984    
## black        0.006684   0.004204   1.590   0.1131    
## lstat        0.496459   0.099360   4.997 1.13e-06 ***
## medv        -0.016963   0.065362  -0.260   0.7955    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.047 on 239 degrees of freedom
## Multiple R-squared:  0.5736, Adjusted R-squared:  0.5504 
## F-statistic: 24.74 on 13 and 239 DF,  p-value: < 2.2e-16
lm.fit.bost
## Linear Regression 
## 
## 253 samples
##  13 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 228, 228, 229, 228, 227, 229, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   4.438062  0.6975947  2.763431
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
lin.cv.error = 4.44
lin.preds.bost = predict(lm.fit.bost, newdata=Boston.test)
linear.mean.err.bost = mean((Boston.test$crim - lin.preds.bost)^2)
linear.mean.err.bost
## [1] 65.15022
lin.RMSE.bost = RMSE(lin.preds.bost, Boston.test$crim)
lin.RMSE.bost
## [1] 8.071568
lin.test.r2.bost = R2(lin.preds.bost, Boston.test$crim)
lin.test.r2.bost
## [1] 0.2888541

Best Subset Selection

set.seed(8)
predict.regsubsets = function(object, newdata, id, ...) {
    form = as.formula(object$call[[2]])
    mat = model.matrix(form, newdata)
    coefi = coef(object, id = id)
    mat[, names(coefi)] %*% coefi
}

k = 10
p = ncol(Boston) - 1
folds = sample(rep(1:k, length = nrow(Boston)))
cv.errors = matrix(NA, k, p)
for (i in 1:k) {
    best.fit = regsubsets(crim ~ ., data = Boston[folds != i, ], nvmax = p)
    for (j in 1:p) {
        pred = predict(best.fit, Boston[folds == i, ], id = j)
        cv.errors[i, j] = mean((Boston$crim[folds == i] - pred)^2)
    }
}
rmse.cv = sqrt(apply(cv.errors, 2, mean))
plot(rmse.cv, pch = 19, type = "b")

which.min(rmse.cv)
## [1] 11
reg.bestsub =regsubsets(crim ~., data = Boston.train, nvmax=11)
summary(reg.bestsub)
## Subset selection object
## Call: regsubsets.formula(crim ~ ., data = Boston.train, nvmax = 11)
## 13 Variables  (and intercept)
##         Forced in Forced out
## zn          FALSE      FALSE
## indus       FALSE      FALSE
## chas        FALSE      FALSE
## nox         FALSE      FALSE
## rm          FALSE      FALSE
## age         FALSE      FALSE
## dis         FALSE      FALSE
## rad         FALSE      FALSE
## tax         FALSE      FALSE
## ptratio     FALSE      FALSE
## black       FALSE      FALSE
## lstat       FALSE      FALSE
## medv        FALSE      FALSE
## 1 subsets of each size up to 11
## Selection Algorithm: exhaustive
##           zn  indus chas nox rm  age dis rad tax ptratio black lstat medv
## 1  ( 1 )  " " " "   " "  " " " " " " " " "*" " " " "     " "   " "   " " 
## 2  ( 1 )  " " " "   " "  " " " " " " " " "*" " " " "     " "   "*"   " " 
## 3  ( 1 )  " " "*"   " "  " " " " " " " " "*" " " " "     " "   "*"   " " 
## 4  ( 1 )  "*" " "   " "  " " " " " " " " "*" " " " "     "*"   "*"   " " 
## 5  ( 1 )  "*" "*"   " "  " " " " " " "*" "*" " " " "     " "   "*"   " " 
## 6  ( 1 )  "*" "*"   " "  " " " " " " "*" "*" " " " "     "*"   "*"   " " 
## 7  ( 1 )  "*" "*"   " "  " " " " "*" "*" "*" " " " "     "*"   "*"   " " 
## 8  ( 1 )  "*" "*"   "*"  " " " " "*" "*" "*" " " " "     "*"   "*"   " " 
## 9  ( 1 )  "*" "*"   "*"  " " " " "*" "*" "*" "*" " "     "*"   "*"   " " 
## 10  ( 1 ) "*" " "   "*"  "*" " " "*" "*" "*" "*" "*"     "*"   "*"   " " 
## 11  ( 1 ) "*" "*"   "*"  "*" " " "*" "*" "*" "*" "*"     "*"   "*"   " "
final.bestsub.mod = train(crim ~ zn + indus + chas + nox + age + dis + rad + tax
                     + ptratio + black + lstat + medv, data=Boston.train, trControl = train_control_b, method = "lm")
summary(final.bestsub.mod)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.732 -2.166 -0.213  1.323 52.759 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.931473   6.764105   0.433   0.6651    
## zn           0.038816   0.021690   1.790   0.0748 .  
## indus       -0.067485   0.102291  -0.660   0.5101    
## chas        -1.197279   1.348455  -0.888   0.3755    
## nox         -5.843532   5.943470  -0.983   0.3265    
## age         -0.018596   0.021451  -0.867   0.3869    
## dis         -0.710742   0.308303  -2.305   0.0220 *  
## rad          0.536906   0.106410   5.046 8.93e-07 ***
## tax         -0.005042   0.006217  -0.811   0.4182    
## ptratio     -0.169050   0.206924  -0.817   0.4148    
## black        0.007009   0.004143   1.692   0.0920 .  
## lstat        0.509363   0.095605   5.328 2.29e-07 ***
## medv        -0.028481   0.060830  -0.468   0.6401    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.039 on 240 degrees of freedom
## Multiple R-squared:  0.5732, Adjusted R-squared:  0.5519 
## F-statistic: 26.86 on 12 and 240 DF,  p-value: < 2.2e-16
final.bestsub.mod
## Linear Regression 
## 
## 253 samples
##  12 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 227, 229, 228, 228, 228, 228, ... 
## Resampling results:
## 
##   RMSE      Rsquared   MAE     
##   4.321811  0.6867314  2.745647
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE
bestsub.cv.error = 4.32
bestsub.preds = predict(final.bestsub.mod, newdata=Boston.test)
bestsub.mean.err.bost = mean((Boston.test$crim - bestsub.preds)^2)
bestsub.mean.err.bost
## [1] 64.93419
bestsub.RMSE.bost = RMSE(bestsub.preds, Boston.test$crim)
bestsub.RMSE.bost
## [1] 8.058175
bestsub.test.r2.bost = R2(bestsub.preds, Boston.test$crim)
bestsub.test.r2.bost
## [1] 0.2909503

Lasso Regression

x_train_bost = model.matrix(crim~., Boston.train)[,-1]

y_train_bost = Boston.train$crim


x_test_bost = model.matrix(crim~., Boston.test)[,-1]

y_test_bost = Boston.test$crim
lambda_vector_bost =10^seq(4,-2,length=100)
set.seed(8)
lasso.fit.bost = train(y = y_train_bost, x = x_train_bost, method = 'glmnet', trControl = 
                          train_control_b, tuneGrid = expand.grid(alpha = 1, lambda = lambda_vector_bost))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
lasso.fit.bost$bestTune
##    alpha    lambda
## 31     1 0.6579332
lasso.fit.bost
## glmnet 
## 
## 253 samples
##  13 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 228, 228, 229, 228, 227, 229, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE      Rsquared   MAE     
##   1.000000e-02  4.422344  0.6985197  2.742232
##   1.149757e-02  4.420325  0.6986081  2.739580
##   1.321941e-02  4.417816  0.6987371  2.736668
##   1.519911e-02  4.415230  0.6988474  2.733394
##   1.747528e-02  4.412147  0.6989833  2.729677
##   2.009233e-02  4.408714  0.6991221  2.725520
##   2.310130e-02  4.404949  0.6992440  2.720603
##   2.656088e-02  4.400487  0.6994292  2.714125
##   3.053856e-02  4.394812  0.6997575  2.706932
##   3.511192e-02  4.388468  0.7001356  2.699243
##   4.037017e-02  4.381721  0.7004868  2.690598
##   4.641589e-02  4.374839  0.7008001  2.681677
##   5.336699e-02  4.367866  0.7011352  2.672443
##   6.135907e-02  4.360588  0.7015730  2.661462
##   7.054802e-02  4.354133  0.7018470  2.649796
##   8.111308e-02  4.348339  0.7019372  2.638154
##   9.326033e-02  4.343043  0.7018547  2.626907
##   1.072267e-01  4.339378  0.7015502  2.617123
##   1.232847e-01  4.336476  0.7011171  2.608642
##   1.417474e-01  4.330140  0.7009157  2.597100
##   1.629751e-01  4.324721  0.7007884  2.585064
##   1.873817e-01  4.323015  0.7000241  2.576049
##   2.154435e-01  4.320640  0.6992298  2.564479
##   2.477076e-01  4.317059  0.6984895  2.553826
##   2.848036e-01  4.309811  0.6980113  2.543964
##   3.274549e-01  4.301612  0.6974706  2.533287
##   3.764936e-01  4.298648  0.6967141  2.523524
##   4.328761e-01  4.293943  0.6964743  2.506174
##   4.977024e-01  4.289551  0.6964415  2.485400
##   5.722368e-01  4.286248  0.6964021  2.461835
##   6.579332e-01  4.284753  0.6963539  2.436510
##   7.564633e-01  4.286129  0.6962900  2.409908
##   8.697490e-01  4.291856  0.6962075  2.382722
##   1.000000e+00  4.303982  0.6960981  2.355621
##   1.149757e+00  4.325332  0.6959543  2.330952
##   1.321941e+00  4.359722  0.6957562  2.315282
##   1.519911e+00  4.412213  0.6954744  2.321582
##   1.747528e+00  4.489335  0.6950589  2.359593
##   2.009233e+00  4.599199  0.6944083  2.440036
##   2.310130e+00  4.751443  0.6933171  2.579428
##   2.656088e+00  4.956788  0.6913222  2.784784
##   3.053856e+00  5.226624  0.6871211  3.046185
##   3.511192e+00  5.571396  0.6768657  3.364104
##   4.037017e+00  5.985514  0.6662700  3.747078
##   4.641589e+00  6.479141  0.6355898  4.222106
##   5.336699e+00  6.810710        NaN  4.562012
##   6.135907e+00  6.810710        NaN  4.562012
##   7.054802e+00  6.810710        NaN  4.562012
##   8.111308e+00  6.810710        NaN  4.562012
##   9.326033e+00  6.810710        NaN  4.562012
##   1.072267e+01  6.810710        NaN  4.562012
##   1.232847e+01  6.810710        NaN  4.562012
##   1.417474e+01  6.810710        NaN  4.562012
##   1.629751e+01  6.810710        NaN  4.562012
##   1.873817e+01  6.810710        NaN  4.562012
##   2.154435e+01  6.810710        NaN  4.562012
##   2.477076e+01  6.810710        NaN  4.562012
##   2.848036e+01  6.810710        NaN  4.562012
##   3.274549e+01  6.810710        NaN  4.562012
##   3.764936e+01  6.810710        NaN  4.562012
##   4.328761e+01  6.810710        NaN  4.562012
##   4.977024e+01  6.810710        NaN  4.562012
##   5.722368e+01  6.810710        NaN  4.562012
##   6.579332e+01  6.810710        NaN  4.562012
##   7.564633e+01  6.810710        NaN  4.562012
##   8.697490e+01  6.810710        NaN  4.562012
##   1.000000e+02  6.810710        NaN  4.562012
##   1.149757e+02  6.810710        NaN  4.562012
##   1.321941e+02  6.810710        NaN  4.562012
##   1.519911e+02  6.810710        NaN  4.562012
##   1.747528e+02  6.810710        NaN  4.562012
##   2.009233e+02  6.810710        NaN  4.562012
##   2.310130e+02  6.810710        NaN  4.562012
##   2.656088e+02  6.810710        NaN  4.562012
##   3.053856e+02  6.810710        NaN  4.562012
##   3.511192e+02  6.810710        NaN  4.562012
##   4.037017e+02  6.810710        NaN  4.562012
##   4.641589e+02  6.810710        NaN  4.562012
##   5.336699e+02  6.810710        NaN  4.562012
##   6.135907e+02  6.810710        NaN  4.562012
##   7.054802e+02  6.810710        NaN  4.562012
##   8.111308e+02  6.810710        NaN  4.562012
##   9.326033e+02  6.810710        NaN  4.562012
##   1.072267e+03  6.810710        NaN  4.562012
##   1.232847e+03  6.810710        NaN  4.562012
##   1.417474e+03  6.810710        NaN  4.562012
##   1.629751e+03  6.810710        NaN  4.562012
##   1.873817e+03  6.810710        NaN  4.562012
##   2.154435e+03  6.810710        NaN  4.562012
##   2.477076e+03  6.810710        NaN  4.562012
##   2.848036e+03  6.810710        NaN  4.562012
##   3.274549e+03  6.810710        NaN  4.562012
##   3.764936e+03  6.810710        NaN  4.562012
##   4.328761e+03  6.810710        NaN  4.562012
##   4.977024e+03  6.810710        NaN  4.562012
##   5.722368e+03  6.810710        NaN  4.562012
##   6.579332e+03  6.810710        NaN  4.562012
##   7.564633e+03  6.810710        NaN  4.562012
##   8.697490e+03  6.810710        NaN  4.562012
##   1.000000e+04  6.810710        NaN  4.562012
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.6579332.
lasso.cv.error = 4.28
coef(lasso.fit.bost$finalModel, lasso.fit.bost$bestTune$lambda)
## 14 x 1 sparse Matrix of class "dgCMatrix"
##                      1
## (Intercept) -4.0639552
## zn           .        
## indus        .        
## chas         .        
## nox          .        
## rm           .        
## age          .        
## dis          .        
## rad          0.3364142
## tax          .        
## ptratio      .        
## black        .        
## lstat        0.3536487
## medv         .
final.lasso.bost = glmnet(x_train_bost, y_train_bost, alpha = 1, lambda = 0.6579332)
lasso.preds.bost = predict(final.lasso.bost, newx = x_test_bost)

lasso.mean.error.bost = mean((Boston.test$crim - lasso.preds.bost)^2)

lasso.mean.error.bost
## [1] 63.23433
lasso.RMSE.bost = RMSE(lasso.preds.bost, Boston.test$crim)
lasso.RMSE.bost
## [1] 7.952001
lasso.test.r2.bost = R2(lasso.preds.bost, Boston.test$crim)
lasso.test.r2.bost
##             s0
## [1,] 0.3127097

Ridge Regression

set.seed(8)
ridge.fit.bost = train(y = y_train_bost, x = x_train_bost, method = 'glmnet', trControl = 
                          train_control_b, tuneGrid = expand.grid(alpha = 0, lambda = lambda_vector_bost))
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
ridge.fit.bost$bestTune
##    alpha lambda
## 34     0      1
ridge.fit.bost
## glmnet 
## 
## 253 samples
##  13 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 228, 228, 229, 228, 227, 229, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE      Rsquared   MAE     
##   1.000000e-02  4.344479  0.6975261  2.632751
##   1.149757e-02  4.344479  0.6975261  2.632751
##   1.321941e-02  4.344479  0.6975261  2.632751
##   1.519911e-02  4.344479  0.6975261  2.632751
##   1.747528e-02  4.344479  0.6975261  2.632751
##   2.009233e-02  4.344479  0.6975261  2.632751
##   2.310130e-02  4.344479  0.6975261  2.632751
##   2.656088e-02  4.344479  0.6975261  2.632751
##   3.053856e-02  4.344479  0.6975261  2.632751
##   3.511192e-02  4.344479  0.6975261  2.632751
##   4.037017e-02  4.344479  0.6975261  2.632751
##   4.641589e-02  4.344479  0.6975261  2.632751
##   5.336699e-02  4.344479  0.6975261  2.632751
##   6.135907e-02  4.344479  0.6975261  2.632751
##   7.054802e-02  4.344479  0.6975261  2.632751
##   8.111308e-02  4.344479  0.6975261  2.632751
##   9.326033e-02  4.344479  0.6975261  2.632751
##   1.072267e-01  4.344479  0.6975261  2.632751
##   1.232847e-01  4.344479  0.6975261  2.632751
##   1.417474e-01  4.344479  0.6975261  2.632751
##   1.629751e-01  4.344479  0.6975261  2.632751
##   1.873817e-01  4.344479  0.6975261  2.632751
##   2.154435e-01  4.344479  0.6975261  2.632751
##   2.477076e-01  4.344479  0.6975261  2.632751
##   2.848036e-01  4.344479  0.6975261  2.632751
##   3.274549e-01  4.344479  0.6975261  2.632751
##   3.764936e-01  4.344479  0.6975261  2.632751
##   4.328761e-01  4.344479  0.6975261  2.632751
##   4.977024e-01  4.345708  0.6973622  2.633528
##   5.722368e-01  4.343774  0.6967349  2.627500
##   6.579332e-01  4.339961  0.6960420  2.618965
##   7.564633e-01  4.336972  0.6951816  2.611403
##   8.697490e-01  4.335010  0.6941374  2.605373
##   1.000000e+00  4.334182  0.6928902  2.598499
##   1.149757e+00  4.334907  0.6913986  2.591019
##   1.321941e+00  4.337189  0.6896537  2.583344
##   1.519911e+00  4.341268  0.6876312  2.576850
##   1.747528e+00  4.347339  0.6853212  2.571034
##   2.009233e+00  4.355508  0.6827339  2.566545
##   2.310130e+00  4.365853  0.6798327  2.562779
##   2.656088e+00  4.378385  0.6766537  2.558917
##   3.053856e+00  4.393137  0.6731942  2.558330
##   3.511192e+00  4.409984  0.6694909  2.559274
##   4.037017e+00  4.429102  0.6655266  2.560010
##   4.641589e+00  4.450329  0.6613766  2.562738
##   5.336699e+00  4.473735  0.6570282  2.565458
##   6.135907e+00  4.499253  0.6525609  2.568827
##   7.054802e+00  4.527103  0.6479668  2.574421
##   8.111308e+00  4.557164  0.6433356  2.582714
##   9.326033e+00  4.589845  0.6386601  2.593778
##   1.072267e+01  4.625164  0.6340375  2.607664
##   1.232847e+01  4.663726  0.6294335  2.624797
##   1.417474e+01  4.705482  0.6249768  2.642750
##   1.629751e+01  4.751262  0.6206181  2.662064
##   1.873817e+01  4.801203  0.6164494  2.685009
##   2.154435e+01  4.855941  0.6124407  2.717922
##   2.477076e+01  4.915464  0.6086861  2.757839
##   2.848036e+01  4.980441  0.6051213  2.802821
##   3.274549e+01  5.050474  0.6018275  2.852876
##   3.764936e+01  5.125905  0.5987394  2.914806
##   4.328761e+01  5.205817  0.5959242  2.986263
##   4.977024e+01  5.290216  0.5933078  3.066058
##   5.722368e+01  5.377614  0.5909603  3.148946
##   6.579332e+01  5.467783  0.5888058  3.234788
##   7.564633e+01  5.558858  0.5868856  3.325839
##   8.697490e+01  5.650548  0.5851363  3.420268
##   1.000000e+02  5.740851  0.5835907  3.512782
##   1.149757e+02  5.829651  0.5821918  3.602386
##   1.321941e+02  5.915072  0.5809645  3.687809
##   1.519911e+02  5.997305  0.5798593  3.769146
##   1.747528e+02  6.074772  0.5788934  3.844183
##   2.009233e+02  6.147983  0.5780285  3.914655
##   2.310130e+02  6.215730  0.5772785  3.979519
##   2.656088e+02  6.278770  0.5766078  4.039530
##   3.053856e+02  6.336244  0.5760276  4.094808
##   3.511192e+02  6.389046  0.5755099  4.146337
##   4.037017e+02  6.436605  0.5750634  4.193217
##   4.641589e+02  6.479851  0.5746658  4.235766
##   5.336699e+02  6.518427  0.5743235  4.273727
##   6.135907e+02  6.553218  0.5740192  4.308083
##   7.054802e+02  6.584014  0.5737577  4.338394
##   8.111308e+02  6.611611  0.5735254  4.365433
##   9.326033e+02  6.635892  0.5733261  4.389401
##   1.072267e+03  6.657542  0.5731492  4.410706
##   1.232847e+03  6.676502  0.5729977  4.429306
##   1.417474e+03  6.693342  0.5728648  4.445814
##   1.629751e+03  6.708036  0.5727492  4.460387
##   1.873817e+03  6.721048  0.5726467  4.473265
##   2.154435e+03  6.732370  0.5725590  4.484453
##   2.477076e+03  6.742374  0.5724813  4.494321
##   2.848036e+03  6.751060  0.5724148  4.502941
##   3.274549e+03  6.758721  0.5723560  4.510554
##   3.764936e+03  6.765362  0.5723056  4.517148
##   4.328761e+03  6.771343  0.5722611  4.523110
##   4.977024e+03  6.800911  0.6067236  4.552676
##   5.722368e+03  6.810710        NaN  4.562012
##   6.579332e+03  6.810710        NaN  4.562012
##   7.564633e+03  6.810710        NaN  4.562012
##   8.697490e+03  6.810710        NaN  4.562012
##   1.000000e+04  6.810710        NaN  4.562012
## 
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 1.
ridge.cv.error = 4.34
coef(ridge.fit.bost$finalModel, ridge.fit.bost$bestTune$lambda)
## 14 x 1 sparse Matrix of class "dgCMatrix"
##                        1
## (Intercept)  0.267894814
## zn           0.028178313
## indus       -0.073328379
## chas        -1.093209480
## nox         -1.121283654
## rm          -0.431817343
## age         -0.002141736
## dis         -0.435124734
## rad          0.306261724
## tax          0.004623799
## ptratio     -0.039136060
## black        0.002839402
## lstat        0.356944500
## medv        -0.026296716
final.ridge.bost = glmnet(x_train_bost, y_train_bost, alpha = 1, lambda = 1)
ridge.preds.bost = predict(final.ridge.bost, newx = x_test_bost)

ridge.mean.error.bost = mean((Boston.test$crim - ridge.preds.bost)^2)

ridge.mean.error.bost
## [1] 63.79812
ridge.RMSE.bost = RMSE(ridge.preds.bost, Boston.test$crim)
ridge.RMSE.bost
## [1] 7.987373
ridge.test.r2.bost = R2(ridge.preds.bost, Boston.test$crim)
ridge.test.r2.bost
##             s0
## [1,] 0.3143458

PCR

set.seed(8)

pcr.fit.bost = train(crim ~., data=Boston.train, scale = TRUE, trControl = train_control_b, tuneLength=14, method = 'pcr')
plot(pcr.fit.bost)

pcr.fit.bost$bestTune
##   ncomp
## 9     9
print(pcr.fit.bost)
## Principal Component Analysis 
## 
## 253 samples
##  13 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 228, 228, 229, 228, 227, 229, ... 
## Resampling results across tuning parameters:
## 
##   ncomp  RMSE      Rsquared   MAE     
##    1     5.059584  0.5387187  3.215446
##    2     5.046589  0.5463093  3.281501
##    3     4.821077  0.5987237  2.866363
##    4     4.771006  0.6079421  2.811514
##    5     4.776528  0.6066792  2.811613
##    6     4.658189  0.6255412  2.865325
##    7     4.674648  0.6314719  2.975392
##    8     4.592278  0.6461243  2.850103
##    9     4.486453  0.6722929  2.847627
##   10     4.490941  0.6823706  2.864746
##   11     4.488325  0.6850498  2.857102
##   12     4.487390  0.6844183  2.813741
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 9.
pcr.cv.error = 4.48
summary(pcr.fit.bost$finalModel)
## Data:    X dimension: 253 13 
##  Y dimension: 253 1
## Fit method: svdpc
## Number of components considered: 9
## TRAINING: % variance explained
##           1 comps  2 comps  3 comps  4 comps  5 comps  6 comps  7 comps
## X           49.66    62.49    71.12    78.46    84.72    89.43    92.32
## .outcome    40.11    41.35    44.97    45.44    45.45    48.47    49.47
##           8 comps  9 comps
## X           94.26    96.04
## .outcome    51.14    54.19
coef(pcr.fit.bost$finalModel, pcr.fit.bost$bestTune$ncomp)
## , , 9 comps
## 
##           .outcome
## zn       0.5469047
## indus   -2.0672558
## chas    -0.2685570
## nox      0.2376067
## rm      -0.8034521
## age      0.6829360
## dis     -0.6410346
## rad      2.9174962
## tax      1.7255229
## ptratio -0.2899265
## black    0.2073022
## lstat    1.9114809
## medv    -0.2821096
pcr.pred.bost = predict(pcr.fit.bost, Boston.test, ncomp=9)
pcr.mean.err.bost = mean((Boston.test$crim - pcr.pred.bost)^2)
pcr.mean.err.bost
## [1] 62.816
pcr.RMSE.bost =  RMSE(pcr.pred.bost, Boston.test$crim)
pcr.RMSE.bost
## [1] 7.925655
pcr.test.rsquared.bost = R2(pcr.pred.bost, Boston.test$crim)
pcr.test.rsquared.bost
## [1] 0.3121689

11B.

data.frame(method = c("OLS", "Best Subset", "Lasso", "Ridge", "PCR"), 
           CV_RMSE = c(lin.cv.error, bestsub.cv.error, lasso.cv.error, ridge.cv.error, pcr.cv.error), 
           MSE = c(linear.mean.err.bost, bestsub.mean.err.bost, lasso.mean.error.bost,
                   ridge.mean.error.bost, pcr.mean.err.bost), 
           RMSE = c(lin.RMSE.bost, bestsub.RMSE.bost, lasso.RMSE.bost, ridge.RMSE.bost, pcr.RMSE.bost),
           test.rsquared = c(lin.test.r2.bost, bestsub.test.r2.bost, lasso.test.r2.bost,
                             ridge.test.r2.bost, pcr.test.rsquared.bost))
##        method CV_RMSE      MSE     RMSE test.rsquared
## 1         OLS    4.44 65.15022 8.071568     0.2888541
## 2 Best Subset    4.32 64.93419 8.058175     0.2909503
## 3       Lasso    4.28 63.23433 7.952001     0.3127097
## 4       Ridge    4.34 63.79812 7.987373     0.3143458
## 5         PCR    4.48 62.81600 7.925655     0.3121689

All the models perform relatively the same and there is not a substantial amount of difference between all of the models performance. Looking at the cross-validation RMSE, we can see that the best subset model had the lowest error. PCR had the highest cross-validation RMSE.Ridge has the highest test r-squared (0.3143458) and the OLS regression had the lowest test r-squared (0.2888541).

9C.

My chosen model is the Ridge regression because it has the highest test r-squared. The ridge regression kept all of the variables. It kept all the variables because the penalty in ridge will never force any of the coefficients to be exactly zero. Thus, the final model in a ridge regression will include all the variables.