Option iii
Option iii
Option ii
library(ISLR)
attach(College)
set.seed(1)
train_index = sample(1:nrow(College), round(nrow(College) * 0.70))
train = College[train_index, ]
test = College[-train_index, ]
lm.fit = lm(Apps~., data = train)
lm.pred = predict(lm.fit, test)
lm.err = mean((test$Apps - lm.pred)^2)
print(lm.err)
## [1] 1266407
Our above output shows the test error obtained from the linear model is 1266407.
library(glmnet)
set.seed(1)
xtrain = model.matrix(Apps~., data = train[,-1])
ytrain = train$Apps
xtest = model.matrix(Apps~., data = test[,-1])
ytest = test$Apps
ridge.fit = cv.glmnet(xtrain, ytrain, alpha = 0)
plot(ridge.fit)
ridge.lambda = ridge.fit$lambda.min
ridge.lambda
## [1] 367.2207
best.lam = ridge.fit$lambda.min
ridge.pred = predict(ridge.fit, s = best.lam, newx = xtest)
mean((ridge.pred - ytest)^2)
## [1] 1168672
The above output shows that our test error is 1168672.
set.seed(1)
lasso.fit = cv.glmnet(xtrain, ytrain, alpha = 1)
plot(lasso.fit)
best.lam = lasso.fit$lambda.min
best.lam
## [1] 1.959746
lasso.pred = predict(lasso.fit, s=best.lam, newx=xtest)
mean((lasso.pred - ytest)^2)
## [1] 1269434
The above output shows our test error is 1269434.
lasso.coeff = predict(lasso.fit, type='coefficients', s=best.lam)[1:18,]
lasso.coeff
## (Intercept) (Intercept) Accept Enroll Top10perc
## -1.055063e+03 0.000000e+00 1.702035e+00 -9.452336e-01 5.153330e+01
## Top25perc F.Undergrad P.Undergrad Outstate Room.Board
## -1.453161e+01 3.640165e-02 7.359690e-02 -1.072338e-01 1.455419e-01
## Books Personal PhD Terminal S.F.Ratio
## 2.606755e-01 -5.200636e-03 -6.877813e+00 6.083357e-01 2.492705e+01
## perc.alumni Expend Grad.Rate
## 6.269905e-01 6.299937e-02 6.095267e+00
lasso.coeff[lasso.coeff!=0]
## (Intercept) Accept Enroll Top10perc Top25perc
## -1.055063e+03 1.702035e+00 -9.452336e-01 5.153330e+01 -1.453161e+01
## F.Undergrad P.Undergrad Outstate Room.Board Books
## 3.640165e-02 7.359690e-02 -1.072338e-01 1.455419e-01 2.606755e-01
## Personal PhD Terminal S.F.Ratio perc.alumni
## -5.200636e-03 -6.877813e+00 6.083357e-01 2.492705e+01 6.269905e-01
## Expend Grad.Rate
## 6.299937e-02 6.095267e+00
The above output shows we have 17 non-zero coefficients.
library(pls)
set.seed(1)
pcr.fit = pcr(Apps~., data = train, scale = TRUE, validation = "CV")
summary(pcr.fit)
## Data: X dimension: 544 17
## Y dimension: 544 1
## Fit method: svdpc
## Number of components considered: 17
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 3892 3821 2139 2155 1806 1742 1719
## adjCV 3892 3821 2134 2152 1776 1717 1711
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 1724 1673 1641 1634 1638 1642 1643
## adjCV 1722 1659 1634 1627 1631 1635 1636
## 14 comps 15 comps 16 comps 17 comps
## CV 1646 1552 1179 1152
## adjCV 1639 1535 1169 1143
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 32.058 57.03 64.43 70.29 75.66 80.65 84.26 87.61
## Apps 5.575 71.65 71.65 81.07 82.59 82.61 82.69 84.06
## 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps 15 comps
## X 90.59 92.84 94.93 96.74 97.82 98.72 99.38
## Apps 84.56 84.83 84.86 84.87 85.02 85.06 89.81
## 16 comps 17 comps
## X 99.85 100.00
## Apps 93.04 93.32
validationplot(pcr.fit, val.type="MSEP")
pcr.pred = predict(pcr.fit, test, ncomp = 10)
mean((test$Apps - pcr.pred)^2)
## [1] 1842925
The above output shows cross-validation has chosen M=10 and our test error is 1842925.
set.seed(1)
pls.fit = plsr(Apps~., data = train, scale = TRUE, validation ="CV")
summary(pls.fit)
## Data: X dimension: 544 17
## Y dimension: 544 1
## Fit method: kernelpls
## Number of components considered: 17
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 3892 1959 1735 1563 1476 1246 1174
## adjCV 3892 1954 1731 1554 1451 1222 1163
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 1168 1156 1153 1154 1154 1156 1152
## adjCV 1158 1147 1144 1145 1145 1146 1143
## 14 comps 15 comps 16 comps 17 comps
## CV 1152 1152 1152 1152
## adjCV 1143 1143 1143 1143
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 25.67 47.20 62.49 64.91 67.30 72.46 76.95 80.88
## Apps 76.61 82.45 86.93 90.76 92.84 93.06 93.14 93.20
## 9 comps 10 comps 11 comps 12 comps 13 comps 14 comps 15 comps
## X 82.69 85.27 87.8 90.88 92.47 95.11 97.09
## Apps 93.26 93.28 93.3 93.31 93.32 93.32 93.32
## 16 comps 17 comps
## X 98.37 100.00
## Apps 93.32 93.32
validationplot(pls.fit, val.type = "MSEP")
pls.pred = predict(pls.fit, test, ncomp = 13)
mean((pls.pred - test$Apps)^2)
## [1] 1266751
With M=13, the above output shows our test error is 1266751.
Our test show that our test error for linear-model is 1266407, test error for ridge regression is 1168672, test error for lasso is 1269434, test error for PCR is 1842925 and test error for PLS is 1266751. While the test errors were relatively similar, our data shows that ridge regression performed most accurately.
library(MASS)
library(leaps)
attach(Boston)
set.seed(1)
train = sample(1:nrow(Boston), nrow(Boston)/2)
boston.train = Boston[train,]
test = (-train)
boston.test = Boston[test,]
crim.test = Boston$crim[test]
x.mat.train = model.matrix(crim~., data= boston.train)[,-crim]
x.mat.train = model.matrix(crim~., data=boston.test)[,-crim]
Linear model:
lm.full = lm(crim~.,data = boston.train)
lm.predict = predict(lm.full, boston.test)
mean((lm.predict-crim.test)^2)
## [1] 41.54639
Ridge Regression:
set.seed(1)
x = model.matrix(crim~.,Boston)[,-1]
y = Boston$crim
y.test = y[test]
train = sample(1:nrow(x), nrow(x)/2)
test = (-train)
grid = 10^seq(10,-2,length=100)
ridge.mod = glmnet(x,y,lambda=grid, alpha=0)
cv.out = cv.glmnet(x[train,],y[train],alpha=0)
plot(cv.out)
best.lam = cv.out$lambda.min
ridge.pred = predict(ridge.mod, s = best.lam, newx=x[test,])
mean((ridge.pred-y.test)^2)
## [1] 38.01174
Lasso:
train = sample(1:nrow(x), nrow(x)/2)
test = (-train)
grid=10^seq(10,-2,length=100)
lasso.mod = glmnet(x,y,lambda=grid, alpha=1)
cv.out=cv.glmnet(x[train,],y[train],alpha=1)
plot(cv.out)
best.lam = cv.out$lambda.min
lasso.pred = predict(lasso.mod, s = best.lam, newx=x[test,])
mean((lasso.pred-y.test)^2)
## [1] 39.67203
PCR:
set.seed(1)
pcr.fit = pcr(crim~., data=boston.train, scale=TRUE, validation='CV')
summary(pcr.fit)
## Data: X dimension: 253 13
## Y dimension: 253 1
## Fit method: svdpc
## Number of components considered: 13
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 9.275 7.555 7.549 7.093 6.926 6.932 6.977
## adjCV 9.275 7.550 7.544 7.088 6.920 6.926 6.968
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps 13 comps
## CV 6.976 6.871 6.845 6.848 6.797 6.764 6.700
## adjCV 6.967 6.859 6.843 6.842 6.786 6.751 6.686
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 48.51 60.4 69.86 77.08 82.80 87.68 91.24 93.56
## crim 34.94 35.2 42.83 45.47 45.57 45.58 45.75 47.59
## 9 comps 10 comps 11 comps 12 comps 13 comps
## X 95.47 97.08 98.48 99.54 100.00
## crim 47.68 48.75 49.31 50.14 51.37
validationplot(pcr.fit,val.type="MSEP", main='Crime')
pcr.pred = predict(pcr.fit, boston.test, ncomp = 10)
mean((pcr.pred-y.test)^2)
## [1] 43.34343
The above output shows our MSE values are 41.54639 for Linear Model, 38.01174 for Ridge Regression, 39.67203 for Lasso, and 43.34343 for PCR. From these observations, the best approach appears to be Ridge Regression.
out = glmnet(x, y, alpha = 1, lambda = grid)
ridge.coeff = predict(out, type='coefficient', s = best.lam)[1:14,]
ridge.coeff
## (Intercept) zn indus chas nox rm
## 14.476496146 0.039476650 -0.070676946 -0.643189309 -8.433633678 0.323327442
## age dis rad tax ptratio black
## 0.000000000 -0.877313853 0.539447859 -0.001159816 -0.224731922 -0.007537653
## lstat medv
## 0.126684807 -0.175267556
Our model does not include all of the features in the data set as one coefficient (age) was reduced to zero.