Best Subset Selection method, Ridge Regression, Lasso의 test MSE 값 비교
library(MASS)
library(ISLR)
library(glmnet)
## Loading required package: Matrix
## Loading required package: foreach
## Loaded glmnet 2.0-16
str(Boston)
## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : int  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ black  : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
attach(Boston)
sum(is.na(medv))
## [1] 0
x<-model.matrix(medv~., data=Boston)[,-1]
y<-Boston$medv

set.seed(2013122032)
train=sample(1:nrow(Boston), nrow(Boston)/2)
test=(-train)
y.test=y[test]

1. Multiple Linear Regression Model. Best Subset Selection

library(leaps)
lm.fit.full<-regsubsets(medv~crim+zn+chas+nox+rm+dis+rad+tax+ptratio+black+lstat*rm+poly(lstat,2),data=Boston, nvmax=13)
## Reordering variables and trying again:
(reg.summary<-summary(lm.fit.full))
## Subset selection object
## Call: regsubsets.formula(medv ~ crim + zn + chas + nox + rm + dis + 
##     rad + tax + ptratio + black + lstat * rm + poly(lstat, 2), 
##     data = Boston, nvmax = 13)
## 14 Variables  (and intercept)
##                 Forced in Forced out
## crim                FALSE      FALSE
## zn                  FALSE      FALSE
## chas                FALSE      FALSE
## nox                 FALSE      FALSE
## rm                  FALSE      FALSE
## dis                 FALSE      FALSE
## rad                 FALSE      FALSE
## tax                 FALSE      FALSE
## ptratio             FALSE      FALSE
## black               FALSE      FALSE
## lstat               FALSE      FALSE
## poly(lstat, 2)2     FALSE      FALSE
## rm:lstat            FALSE      FALSE
## poly(lstat, 2)1     FALSE      FALSE
## 1 subsets of each size up to 13
## Selection Algorithm: exhaustive
##           crim zn  chas nox rm  dis rad tax ptratio black lstat
## 1  ( 1 )  " "  " " " "  " " " " " " " " " " " "     " "   "*"  
## 2  ( 1 )  " "  " " " "  " " "*" " " " " " " " "     " "   " "  
## 3  ( 1 )  " "  " " " "  " " "*" " " " " " " " "     " "   " "  
## 4  ( 1 )  " "  " " " "  " " "*" " " " " " " "*"     " "   "*"  
## 5  ( 1 )  " "  " " " "  " " "*" "*" " " " " "*"     " "   "*"  
## 6  ( 1 )  "*"  " " " "  " " "*" "*" " " " " "*"     " "   " "  
## 7  ( 1 )  "*"  " " " "  "*" "*" "*" " " " " "*"     " "   " "  
## 8  ( 1 )  "*"  " " " "  "*" "*" "*" "*" " " "*"     " "   " "  
## 9  ( 1 )  "*"  " " " "  "*" "*" "*" "*" "*" "*"     " "   "*"  
## 10  ( 1 ) "*"  " " "*"  "*" "*" "*" "*" "*" "*"     " "   " "  
## 11  ( 1 ) "*"  " " "*"  "*" "*" "*" "*" "*" "*"     " "   " "  
## 12  ( 1 ) "*"  " " "*"  "*" "*" "*" "*" "*" "*"     "*"   " "  
## 13  ( 1 ) "*"  "*" "*"  "*" "*" "*" "*" "*" "*"     "*"   "*"  
##           poly(lstat, 2)1 poly(lstat, 2)2 rm:lstat
## 1  ( 1 )  " "             " "             " "     
## 2  ( 1 )  " "             " "             "*"     
## 3  ( 1 )  "*"             " "             "*"     
## 4  ( 1 )  " "             " "             "*"     
## 5  ( 1 )  " "             " "             "*"     
## 6  ( 1 )  "*"             " "             "*"     
## 7  ( 1 )  "*"             " "             "*"     
## 8  ( 1 )  "*"             " "             "*"     
## 9  ( 1 )  " "             " "             "*"     
## 10  ( 1 ) "*"             " "             "*"     
## 11  ( 1 ) "*"             "*"             "*"     
## 12  ( 1 ) "*"             "*"             "*"     
## 13  ( 1 ) " "             "*"             "*"
(bestsub.no.var<-which.min(reg.summary$cp))
## [1] 13
plot(reg.summary$cp, main="Relation between Cp and number of variables", xlab="Number of Variabes", ylab="Cp", type="l", xlim=c(0,13.5))
points(bestsub.no.var, reg.summary$cp[bestsub.no.var], col="green",cex=2, pch=20)

bestsub<-lm(medv~crim+zn+chas+nox+rm+dis+rad+tax+ptratio+black+poly(lstat,2),rm*lstat, data=Boston)
(mean((medv-predict(bestsub, data=Boston))[test]^2))
## [1] 147.9016
Boston.test=Boston[test,]
(best.subset.variance<-var(predict(bestsub, Boston.test,type="response")))
## [1] 83.0654

2. Ridge regression with Boston data

grid<-10^seq(10,-2,length=100)
ridge.mod<-glmnet(x,y,alpha=0, lambda=grid)
cv.out1<-cv.glmnet(x[train,],medv[train],alpha=0)
(bestlamb1<-cv.out1$lambda.min)
## [1] 0.7170866
ridge.mod<-glmnet(x[train,],y[train],alpha=0, lambda=grid)
ridge.pred<-predict(ridge.mod, s=bestlamb1, newx=x[test,])
out1<-glmnet(x,y, alpha=0, lambda=grid)
(ridge.coef<-predict(out1, type="coefficients",s=bestlamb1))
## 14 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept)  27.787117301
## crim         -0.087241905
## zn            0.032331960
## indus        -0.039606003
## chas          2.903460658
## nox         -11.764352214
## rm            4.011966332
## age          -0.003850166
## dis          -1.106601138
## rad           0.150455495
## tax          -0.005623005
## ptratio      -0.851483398
## black         0.009050531
## lstat        -0.469882889
(mean((ridge.pred-y.test)^2))
## [1] 30.31254
(var(predict(ridge.mod, s=bestlamb1, type="response", newx=x[test,])))
##          1
## 1 68.61045

3. The Lasso

lasso.mod<-glmnet(x[train,],y[train], alpha=1, lambda=grid)
cv.out2<-cv.glmnet(x[train,],medv[train],alpha=1)
(bestlamb2<-cv.out2$lambda.min)
## [1] 0.03916926
out2<-glmnet(x,y, alpha=1, lambda=grid)
(lasso.coef<-predict(out2, type="coefficients",s=bestlamb2))
## 14 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept)  33.682988194
## crim         -0.094680931
## zn            0.039626479
## indus         .          
## chas          2.669592508
## nox         -15.861315375
## rm            3.890988277
## age           .          
## dis          -1.356020807
## rad           0.235652236
## tax          -0.009111526
## ptratio      -0.924153390
## black         0.008928566
## lstat        -0.522530747
lasso.pred<-predict(lasso.mod, s=bestlamb2, newx<-x[test,])
(mean((lasso.pred-y.test)^2))
## [1] 29.91682
(lasso.variance<-var(predict(lasso.mod, s=bestlamb2, type="response", newx=x[test,])))
##          1
## 1 73.51259
  1. Compare Between Best Subset Selection, Ridge Regression, The Lasso
(best.subset.testMSE<-(mean((medv-predict(bestsub, data=Boston))[test]^2)))
## [1] 147.9016
(ridge.testMSE<-(mean((ridge.pred-y.test)^2)))
## [1] 30.31254
(lasso.testMSE<-(mean((lasso.pred-y.test)^2)))
## [1] 29.91682
which.max(c(best.subset.testMSE, ridge.testMSE, lasso.testMSE))
## [1] 1
(best.subset.variance<-var(predict(bestsub, Boston.test,type="response")))
## [1] 83.0654
(ridge.variance<-var(predict(ridge.mod, s=bestlamb1, type="response", newx=x[test,])))
##          1
## 1 68.61045
(lasso.variance<-var(predict(lasso.mod, s=bestlamb2, type="response", newx=x[test,])))
##          1
## 1 73.51259