Advanced feature selection in linear models

library(mlbench)
data(BostonHousing)
dim(BostonHousing)

## [1] 506  14

head(BostonHousing)

##      crim zn indus chas   nox    rm  age    dis rad tax ptratio      b
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90
## 6 0.02985  0  2.18    0 0.458 6.430 58.7 6.0622   3 222    18.7 394.12
##   lstat medv
## 1  4.98 24.0
## 2  9.14 21.6
## 3  4.03 34.7
## 4  2.94 33.4
## 5  5.33 36.2
## 6  5.21 28.7

str(BostonHousing)

## 'data.frame':    506 obs. of  14 variables:
##  $ crim   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ zn     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ indus  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ chas   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nox    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ rm     : num  6.58 6.42 7.18 7 7.15 ...
##  $ age    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ dis    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ rad    : num  1 2 2 3 3 3 5 5 5 5 ...
##  $ tax    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ ptratio: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ b      : num  397 397 393 395 397 ...
##  $ lstat  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ medv   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...

summary(BostonHousing)

##       crim                zn             indus       chas   
##  Min.   : 0.00632   Min.   :  0.00   Min.   : 0.46   0:471  
##  1st Qu.: 0.08204   1st Qu.:  0.00   1st Qu.: 5.19   1: 35  
##  Median : 0.25651   Median :  0.00   Median : 9.69          
##  Mean   : 3.61352   Mean   : 11.36   Mean   :11.14          
##  3rd Qu.: 3.67708   3rd Qu.: 12.50   3rd Qu.:18.10          
##  Max.   :88.97620   Max.   :100.00   Max.   :27.74          
##       nox               rm             age              dis        
##  Min.   :0.3850   Min.   :3.561   Min.   :  2.90   Min.   : 1.130  
##  1st Qu.:0.4490   1st Qu.:5.886   1st Qu.: 45.02   1st Qu.: 2.100  
##  Median :0.5380   Median :6.208   Median : 77.50   Median : 3.207  
##  Mean   :0.5547   Mean   :6.285   Mean   : 68.57   Mean   : 3.795  
##  3rd Qu.:0.6240   3rd Qu.:6.623   3rd Qu.: 94.08   3rd Qu.: 5.188  
##  Max.   :0.8710   Max.   :8.780   Max.   :100.00   Max.   :12.127  
##       rad              tax           ptratio            b         
##  Min.   : 1.000   Min.   :187.0   Min.   :12.60   Min.   :  0.32  
##  1st Qu.: 4.000   1st Qu.:279.0   1st Qu.:17.40   1st Qu.:375.38  
##  Median : 5.000   Median :330.0   Median :19.05   Median :391.44  
##  Mean   : 9.549   Mean   :408.2   Mean   :18.46   Mean   :356.67  
##  3rd Qu.:24.000   3rd Qu.:666.0   3rd Qu.:20.20   3rd Qu.:396.23  
##  Max.   :24.000   Max.   :711.0   Max.   :22.00   Max.   :396.90  
##      lstat            medv      
##  Min.   : 1.73   Min.   : 5.00  
##  1st Qu.: 6.95   1st Qu.:17.02  
##  Median :11.36   Median :21.20  
##  Mean   :12.65   Mean   :22.53  
##  3rd Qu.:16.95   3rd Qu.:25.00  
##  Max.   :37.97   Max.   :50.00

library(car) #package to calculate Variance Inflation Factor
library(corrplot) #correlation plots
library(leaps) #best subsets regression
library(glmnet) #allows ridge regression, LASSO and elastic net

## Loading required package: Matrix

## Loading required package: foreach

## Loaded glmnet 2.0-10

library(caret) #parameter tuning

## Loading required package: lattice

## Loading required package: ggplot2

data("BostonHousing")

plot(BostonHousing[,-4])

plot(BostonHousing$indus)

table(BostonHousing$chas)

## 
##   0   1 
## 471  35

boxplot(BostonHousing$medv~BostonHousing$chas, xlab="Charles River", ylab="median value")

p.cor = cor(BostonHousing[,-4])
corrplot.mixed(p.cor)

Boston <- BostonHousing[,-4]
p.cor = cor(Boston)
corrplot.mixed(p.cor)

set.seed(123) #random number generator
ind = sample(2, nrow(Boston), replace=TRUE, prob=c(0.7, 0.3))
train = Boston[ind==1,] #the training data set
test = Boston[ind==2,] #the test data set
str(test) #confirm it worked

## 'data.frame':    143 obs. of  13 variables:
##  $ crim   : num  0.0273 0.0324 0.069 0.1446 0.2249 ...
##  $ zn     : num  0 0 0 12.5 12.5 0 0 0 0 0 ...
##  $ indus  : num  7.07 2.18 2.18 7.87 7.87 8.14 8.14 8.14 8.14 8.14 ...
##  $ nox    : num  0.469 0.458 0.458 0.524 0.524 0.538 0.538 0.538 0.538 0.538 ...
##  $ rm     : num  6.42 7 7.15 6.17 6.38 ...
##  $ age    : num  78.9 45.8 54.2 96.1 94.3 56.5 69.5 98.1 100 85.7 ...
##  $ dis    : num  4.97 6.06 6.06 5.95 6.35 ...
##  $ rad    : num  2 3 3 5 5 4 4 4 4 4 ...
##  $ tax    : num  242 222 222 311 311 307 307 307 307 307 ...
##  $ ptratio: num  17.8 18.7 18.7 15.2 15.2 21 21 21 21 21 ...
##  $ b      : num  397 395 397 397 393 ...
##  $ lstat  : num  9.14 2.94 5.33 19.15 20.45 ...
##  $ medv   : num  21.6 33.4 36.2 27.1 15 19.9 18.2 13.6 14.5 13.9 ...

subfit = regsubsets(medv~., data=train)
b.sum = summary(subfit)
which.min(b.sum$bic)

## [1] 7

plot(b.sum$bic, type="l", xlab="# of Features", ylab="BIC", main="BIC score by Feature Inclusion")

plot(subfit, scale="bic", main="Best Subset Features")

ols = lm(medv~zn+nox+rm+dis+ptratio+b+lstat, data=train)
plot(ols$fitted.values, train$medv, xlab="Predicted", ylab="Actual", main="Predicted vs Actual")

pred.subfit = predict(ols, newdata=test)
plot(pred.subfit, test$medv , xlab="Predicted", ylab="Actual", main="Predicted vs Actual")

resid.subfit = test$medv - pred.subfit
mean(resid.subfit^2)

## [1] 21.87516

#ridge regression
x = as.matrix(train[,1:12])
y = train[ ,13]

ridge = glmnet(x, y, family="gaussian", alpha=0)
print(ridge)

## 
## Call:  glmnet(x = x, y = y, family = "gaussian", alpha = 0) 
## 
##        Df      %Dev    Lambda
##   [1,] 12 5.343e-36 6854.0000
##   [2,] 12 7.948e-03 6245.0000
##   [3,] 12 8.712e-03 5690.0000
##   [4,] 12 9.549e-03 5185.0000
##   [5,] 12 1.047e-02 4724.0000
##   [6,] 12 1.147e-02 4304.0000
##   [7,] 12 1.256e-02 3922.0000
##   [8,] 12 1.376e-02 3574.0000
##   [9,] 12 1.507e-02 3256.0000
##  [10,] 12 1.651e-02 2967.0000
##  [11,] 12 1.807e-02 2703.0000
##  [12,] 12 1.978e-02 2463.0000
##  [13,] 12 2.164e-02 2244.0000
##  [14,] 12 2.368e-02 2045.0000
##  [15,] 12 2.590e-02 1863.0000
##  [16,] 12 2.831e-02 1698.0000
##  [17,] 12 3.094e-02 1547.0000
##  [18,] 12 3.380e-02 1409.0000
##  [19,] 12 3.691e-02 1284.0000
##  [20,] 12 4.028e-02 1170.0000
##  [21,] 12 4.395e-02 1066.0000
##  [22,] 12 4.791e-02  971.5000
##  [23,] 12 5.221e-02  885.2000
##  [24,] 12 5.686e-02  806.6000
##  [25,] 12 6.187e-02  734.9000
##  [26,] 12 6.728e-02  669.6000
##  [27,] 12 7.310e-02  610.1000
##  [28,] 12 7.936e-02  555.9000
##  [29,] 12 8.607e-02  506.5000
##  [30,] 12 9.326e-02  461.5000
##  [31,] 12 1.009e-01  420.5000
##  [32,] 12 1.091e-01  383.2000
##  [33,] 12 1.178e-01  349.1000
##  [34,] 12 1.271e-01  318.1000
##  [35,] 12 1.369e-01  289.9000
##  [36,] 12 1.472e-01  264.1000
##  [37,] 12 1.581e-01  240.6000
##  [38,] 12 1.695e-01  219.3000
##  [39,] 12 1.814e-01  199.8000
##  [40,] 12 1.938e-01  182.0000
##  [41,] 12 2.067e-01  165.9000
##  [42,] 12 2.201e-01  151.1000
##  [43,] 12 2.338e-01  137.7000
##  [44,] 12 2.480e-01  125.5000
##  [45,] 12 2.625e-01  114.3000
##  [46,] 12 2.773e-01  104.2000
##  [47,] 12 2.924e-01   94.9200
##  [48,] 12 3.077e-01   86.4800
##  [49,] 12 3.231e-01   78.8000
##  [50,] 12 3.386e-01   71.8000
##  [51,] 12 3.541e-01   65.4200
##  [52,] 12 3.697e-01   59.6100
##  [53,] 12 3.852e-01   54.3100
##  [54,] 12 4.006e-01   49.4900
##  [55,] 12 4.159e-01   45.0900
##  [56,] 12 4.311e-01   41.0900
##  [57,] 12 4.460e-01   37.4400
##  [58,] 12 4.607e-01   34.1100
##  [59,] 12 4.752e-01   31.0800
##  [60,] 12 4.894e-01   28.3200
##  [61,] 12 5.033e-01   25.8000
##  [62,] 12 5.168e-01   23.5100
##  [63,] 12 5.301e-01   21.4200
##  [64,] 12 5.429e-01   19.5200
##  [65,] 12 5.553e-01   17.7900
##  [66,] 12 5.674e-01   16.2100
##  [67,] 12 5.789e-01   14.7700
##  [68,] 12 5.901e-01   13.4500
##  [69,] 12 6.007e-01   12.2600
##  [70,] 12 6.109e-01   11.1700
##  [71,] 12 6.206e-01   10.1800
##  [72,] 12 6.297e-01    9.2730
##  [73,] 12 6.384e-01    8.4500
##  [74,] 12 6.465e-01    7.6990
##  [75,] 12 6.542e-01    7.0150
##  [76,] 12 6.613e-01    6.3920
##  [77,] 12 6.680e-01    5.8240
##  [78,] 12 6.742e-01    5.3070
##  [79,] 12 6.799e-01    4.8350
##  [80,] 12 6.853e-01    4.4060
##  [81,] 12 6.902e-01    4.0140
##  [82,] 12 6.947e-01    3.6580
##  [83,] 12 6.989e-01    3.3330
##  [84,] 12 7.028e-01    3.0370
##  [85,] 12 7.063e-01    2.7670
##  [86,] 12 7.096e-01    2.5210
##  [87,] 12 7.126e-01    2.2970
##  [88,] 12 7.154e-01    2.0930
##  [89,] 12 7.179e-01    1.9070
##  [90,] 12 7.202e-01    1.7380
##  [91,] 12 7.223e-01    1.5830
##  [92,] 12 7.243e-01    1.4430
##  [93,] 12 7.260e-01    1.3140
##  [94,] 12 7.277e-01    1.1980
##  [95,] 12 7.291e-01    1.0910
##  [96,] 12 7.305e-01    0.9944
##  [97,] 12 7.317e-01    0.9060
##  [98,] 12 7.328e-01    0.8255
##  [99,] 12 7.338e-01    0.7522
## [100,] 12 7.347e-01    0.6854

plot(ridge, label=TRUE)

plot(ridge, xvar="lambda", label=TRUE)

ridge.coef = coef(ridge, s=0.1, exact=TRUE)
ridge.coef

## 13 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept)  36.076511743
## crim         -0.111102539
## zn            0.050887090
## indus         0.005784647
## nox         -20.528464797
## rm            4.400431797
## age          -0.004088521
## dis          -1.700728401
## rad           0.292258832
## tax          -0.010788618
## ptratio      -1.055776373
## b             0.009837657
## lstat        -0.436608612

plot(ridge, xvar="dev", label=TRUE)

newx = as.matrix(test[,1:12])
ridge.y = predict(ridge, newx=newx, type="response", s=0.1)
plot(ridge.y, test$medv, xlab="Predicted", ylab="Actual",main="Ridge Regression")

ridge.resid = ridge.y - test$medv
mean(ridge.resid^2)

## [1] 20.49134

#Lasso
lasso = glmnet(x, y, family="gaussian", alpha=1)
print(lasso)

## 
## Call:  glmnet(x = x, y = y, family = "gaussian", alpha = 1) 
## 
##       Df   %Dev   Lambda
##  [1,]  0 0.0000 6.854000
##  [2,]  2 0.1051 6.245000
##  [3,]  2 0.1943 5.690000
##  [4,]  2 0.2684 5.185000
##  [5,]  2 0.3299 4.724000
##  [6,]  2 0.3809 4.304000
##  [7,]  2 0.4233 3.922000
##  [8,]  2 0.4585 3.574000
##  [9,]  2 0.4877 3.256000
## [10,]  3 0.5148 2.967000
## [11,]  3 0.5415 2.703000
## [12,]  3 0.5637 2.463000
## [13,]  3 0.5821 2.244000
## [14,]  3 0.5974 2.045000
## [15,]  3 0.6101 1.863000
## [16,]  3 0.6206 1.698000
## [17,]  3 0.6294 1.547000
## [18,]  3 0.6366 1.409000
## [19,]  4 0.6435 1.284000
## [20,]  4 0.6501 1.170000
## [21,]  4 0.6555 1.066000
## [22,]  5 0.6602 0.971500
## [23,]  5 0.6642 0.885200
## [24,]  5 0.6675 0.806600
## [25,]  6 0.6704 0.734900
## [26,]  6 0.6729 0.669600
## [27,]  7 0.6751 0.610100
## [28,]  7 0.6827 0.555900
## [29,]  7 0.6890 0.506500
## [30,]  7 0.6942 0.461500
## [31,]  7 0.6985 0.420500
## [32,]  8 0.7033 0.383200
## [33,]  8 0.7075 0.349100
## [34,]  8 0.7109 0.318100
## [35,]  8 0.7138 0.289900
## [36,]  8 0.7162 0.264100
## [37,]  9 0.7182 0.240600
## [38,]  9 0.7211 0.219300
## [39,]  9 0.7234 0.199800
## [40,] 10 0.7254 0.182000
## [41,] 11 0.7281 0.165900
## [42,] 11 0.7304 0.151100
## [43,] 10 0.7323 0.137700
## [44,] 10 0.7339 0.125500
## [45,] 10 0.7351 0.114300
## [46,] 10 0.7362 0.104200
## [47,] 10 0.7371 0.094920
## [48,] 10 0.7379 0.086480
## [49,] 10 0.7385 0.078800
## [50,] 10 0.7390 0.071800
## [51,] 10 0.7394 0.065420
## [52,] 10 0.7398 0.059610
## [53,] 10 0.7401 0.054310
## [54,] 11 0.7403 0.049490
## [55,] 11 0.7405 0.045090
## [56,] 11 0.7407 0.041090
## [57,] 11 0.7408 0.037440
## [58,] 11 0.7410 0.034110
## [59,] 11 0.7411 0.031080
## [60,] 11 0.7411 0.028320
## [61,] 11 0.7412 0.025800
## [62,] 11 0.7413 0.023510
## [63,] 11 0.7413 0.021420
## [64,] 11 0.7414 0.019520
## [65,] 11 0.7414 0.017790
## [66,] 11 0.7414 0.016210
## [67,] 12 0.7414 0.014770
## [68,] 12 0.7415 0.013450
## [69,] 12 0.7415 0.012260
## [70,] 12 0.7415 0.011170
## [71,] 12 0.7415 0.010180
## [72,] 12 0.7415 0.009273
## [73,] 12 0.7416 0.008450
## [74,] 12 0.7416 0.007699
## [75,] 12 0.7416 0.007015
## [76,] 12 0.7416 0.006392

plot(lasso, xvar="lambda", label=TRUE)

lasso.coef = coef(lasso, s=0.015, exact=TRUE)
lasso.coef

## 13 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept)  3.697616e+01
## crim        -1.119973e-01
## zn           5.187351e-02
## indus        3.701671e-04
## nox         -2.105488e+01
## rm           4.365174e+00
## age         -2.256860e-03
## dis         -1.739044e+00
## rad          3.037438e-01
## tax         -1.117913e-02
## ptratio     -1.063355e+00
## b            9.706682e-03
## lstat       -4.427820e-01

lasso.y = predict(lasso, newx=newx, type="response", s=0.015)
plot(lasso.y, test$medv, xlab="Predicted", ylab="Actual", main="LASSO")

lasso.resid = lasso.y - test$medv
mean(lasso.resid^2)

## [1] 21.0103

#Elastic net
grid = expand.grid(.alpha=seq(0,1, by=.2), .lambda=seq(0.00,0.2, by=0.02))
table(grid)

##       .lambda
## .alpha 0 0.02 0.04 0.06 0.08 0.1 0.12 0.14 0.16 0.18 0.2
##    0   1    1    1    1    1   1    1    1    1    1   1
##    0.2 1    1    1    1    1   1    1    1    1    1   1
##    0.4 1    1    1    1    1   1    1    1    1    1   1
##    0.6 1    1    1    1    1   1    1    1    1    1   1
##    0.8 1    1    1    1    1   1    1    1    1    1   1
##    1   1    1    1    1    1   1    1    1    1    1   1

control = trainControl(method="LOOCV")
enet.train = train(medv~., data=train, method="glmnet", trControl=control, tuneGrid=grid)

enet.train

## glmnet 
## 
## 363 samples
##  12 predictor
## 
## No pre-processing
## Resampling: Leave-One-Out Cross-Validation 
## Summary of sample sizes: 362, 362, 362, 362, 362, 362, ... 
## Resampling results across tuning parameters:
## 
##   alpha  lambda  RMSE      Rsquared 
##   0.0    0.00    5.101644  0.7122665
##   0.0    0.02    5.101644  0.7122665
##   0.0    0.04    5.101644  0.7122665
##   0.0    0.06    5.101644  0.7122665
##   0.0    0.08    5.101644  0.7122665
##   0.0    0.10    5.101644  0.7122665
##   0.0    0.12    5.101644  0.7122665
##   0.0    0.14    5.101644  0.7122665
##   0.0    0.16    5.101644  0.7122665
##   0.0    0.18    5.101644  0.7122665
##   0.0    0.20    5.101644  0.7122665
##   0.2    0.00    5.071536  0.7152357
##   0.2    0.02    5.071317  0.7152482
##   0.2    0.04    5.069278  0.7154080
##   0.2    0.06    5.065218  0.7158055
##   0.2    0.08    5.064562  0.7158396
##   0.2    0.10    5.066455  0.7156021
##   0.2    0.12    5.068943  0.7153087
##   0.2    0.14    5.072067  0.7149528
##   0.2    0.16    5.075874  0.7145280
##   0.2    0.18    5.079915  0.7140835
##   0.2    0.20    5.083943  0.7136468
##   0.4    0.00    5.071136  0.7152805
##   0.4    0.02    5.070351  0.7153326
##   0.4    0.04    5.064600  0.7158881
##   0.4    0.06    5.065015  0.7157872
##   0.4    0.08    5.067938  0.7154283
##   0.4    0.10    5.072258  0.7149314
##   0.4    0.12    5.077440  0.7143532
##   0.4    0.14    5.082877  0.7137600
##   0.4    0.16    5.088727  0.7131325
##   0.4    0.18    5.095753  0.7123809
##   0.4    0.20    5.103554  0.7115487
##   0.6    0.00    5.071549  0.7152408
##   0.6    0.02    5.068851  0.7154756
##   0.6    0.04    5.064322  0.7158877
##   0.6    0.06    5.067782  0.7154509
##   0.6    0.08    5.073102  0.7148363
##   0.6    0.10    5.078605  0.7142275
##   0.6    0.12    5.086528  0.7133668
##   0.6    0.14    5.096769  0.7122587
##   0.6    0.16    5.108985  0.7109362
##   0.6    0.18    5.122668  0.7094528
##   0.6    0.20    5.137326  0.7078617
##   0.8    0.00    5.072097  0.7151838
##   0.8    0.02    5.065667  0.7158068
##   0.8    0.04    5.065458  0.7157364
##   0.8    0.06    5.070640  0.7151159
##   0.8    0.08    5.077388  0.7143610
##   0.8    0.10    5.088438  0.7131518
##   0.8    0.12    5.102777  0.7115903
##   0.8    0.14    5.120076  0.7097034
##   0.8    0.16    5.140142  0.7075030
##   0.8    0.18    5.162723  0.7050088
##   0.8    0.20    5.186842  0.7023278
##   1.0    0.00    5.072064  0.7151867
##   1.0    0.02    5.063951  0.7159768
##   1.0    0.04    5.067284  0.7155129
##   1.0    0.06    5.073814  0.7147564
##   1.0    0.08    5.086140  0.7133961
##   1.0    0.10    5.103660  0.7114788
##   1.0    0.12    5.125536  0.7090808
##   1.0    0.14    5.152399  0.7061095
##   1.0    0.16    5.182900  0.7027032
##   1.0    0.18    5.209522  0.6997390
##   1.0    0.20    5.224552  0.6981484
## 
## RMSE was used to select the optimal model using  the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.02.

enet = glmnet(x, y,family="gaussian", alpha=1, lambda=.02)
enet.coef = coef(enet, s=.02, exact=TRUE)
enet.coef

## 13 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept)  36.665957641
## crim         -0.110659882
## zn            0.051262134
## indus         .          
## nox         -20.919387998
## rm            4.372997843
## age          -0.001889596
## dis          -1.721916544
## rad           0.296280440
## tax          -0.010859856
## ptratio      -1.060014694
## b             0.009657763
## lstat        -0.442844577

enet.y = predict(enet, newx=newx, type="response", s=.02)
plot(enet.y, test$medv, xlab="Predicted", ylab="Actual", main="Elastic Net")

enet.resid = enet.y-test$medv
mean(enet.resid^2)

## [1] 20.97042

#cross validation with glmnet
set.seed(123)
lasso.cv = cv.glmnet(x, y)
plot(lasso.cv)

lasso.cv$lambda.min #minimum

## [1] 0.02351164

lasso.cv$lambda.1se #one standard error away

## [1] 0.2898625

coef(lasso.cv, s ="lambda.1se")

## 13 x 1 sparse Matrix of class "dgCMatrix"
##                         1
## (Intercept)  20.387146446
## crim         -0.042684357
## zn            0.015833165
## indus         .          
## nox         -11.079099176
## rm            4.858171117
## age           .          
## dis          -0.826788579
## rad           .          
## tax           .          
## ptratio      -0.872498509
## b             0.007227703
## lstat        -0.425463297

lasso.y.cv = predict(lasso.cv, newx=newx, type="response", s="lambda.1se")
lasso.cv.resid = lasso.y.cv - test$medv
mean(lasso.cv.resid^2)

## [1] 20.66634

Advanced feature selection in linear models

Kushan De Silva

August 2, 2017