library(tidyverse) # data manipulation and visualization
library(leaps) # model selection functions
library(ISLR2)
library(glmnet)
The default model used in the package is the Guassian linear model or “least squares”. We load a set of dummy data :
data("QuickStartExample")
x <- QuickStartExample$x
y <- QuickStartExample$y
fit <- glmnet(x,y)
names(fit) # so many objects, usually we don't extract the glmnet components directly.
## [1] "a0" "beta" "df" "dim" "lambda" "dev.ratio"
## [7] "nulldev" "npasses" "jerr" "offset" "call" "nobs"
let’s visuallize the coef. - Each curve corresponds to a variable. It shows the path of its coefficient against the \(\ell_1\)-norm. - The axis above indicates the number of nonzero coefficients at the current \(\lambda\). - Df: the number of nonzero coefficients,
plot(fit,label=TRUE)
print(fit)
##
## Call: glmnet(x = x, y = y)
##
## Df %Dev Lambda
## 1 0 0.00 1.63100
## 2 2 5.53 1.48600
## 3 2 14.59 1.35400
## 4 2 22.11 1.23400
## 5 2 28.36 1.12400
## 6 2 33.54 1.02400
## 7 4 39.04 0.93320
## 8 5 45.60 0.85030
## 9 5 51.54 0.77470
## 10 6 57.35 0.70590
## 11 6 62.55 0.64320
## 12 6 66.87 0.58610
## 13 6 70.46 0.53400
## 14 6 73.44 0.48660
## 15 7 76.21 0.44330
## 16 7 78.57 0.40400
## 17 7 80.53 0.36810
## 18 7 82.15 0.33540
## 19 7 83.50 0.30560
## 20 7 84.62 0.27840
## 21 7 85.55 0.25370
## 22 7 86.33 0.23120
## 23 8 87.06 0.21060
## 24 8 87.69 0.19190
## 25 8 88.21 0.17490
## 26 8 88.65 0.15930
## 27 8 89.01 0.14520
## 28 8 89.31 0.13230
## 29 8 89.56 0.12050
## 30 8 89.76 0.10980
## 31 9 89.94 0.10010
## 32 9 90.10 0.09117
## 33 9 90.23 0.08307
## 34 9 90.34 0.07569
## 35 10 90.43 0.06897
## 36 11 90.53 0.06284
## 37 11 90.62 0.05726
## 38 12 90.70 0.05217
## 39 15 90.78 0.04754
## 40 16 90.86 0.04331
## 41 16 90.93 0.03947
## 42 16 90.98 0.03596
## 43 17 91.03 0.03277
## 44 17 91.07 0.02985
## 45 18 91.11 0.02720
## 46 18 91.14 0.02479
## 47 19 91.17 0.02258
## 48 19 91.20 0.02058
## 49 19 91.22 0.01875
## 50 19 91.24 0.01708
## 51 19 91.25 0.01557
## 52 19 91.26 0.01418
## 53 19 91.27 0.01292
## 54 19 91.28 0.01178
## 55 19 91.29 0.01073
## 56 19 91.29 0.00978
## 57 19 91.30 0.00891
## 58 19 91.30 0.00812
## 59 19 91.31 0.00739
## 60 19 91.31 0.00674
## 61 19 91.31 0.00614
## 62 20 91.31 0.00559
## 63 20 91.31 0.00510
## 64 20 91.31 0.00464
## 65 20 91.32 0.00423
## 66 20 91.32 0.00386
## 67 20 91.32 0.00351
As you move from left to right on the L1 norm axis, the L1 norm(sum of the absolute values of the coefficients) increases, corresponding to smaller values of \(\lambda\).
We can obtain the model coefficients at one or more \(\lambda\)’s within the range of the sequence:
coef(fit,s=0.1)
## 21 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 0.150928072
## V1 1.320597195
## V2 .
## V3 0.675110234
## V4 .
## V5 -0.817411518
## V6 0.521436671
## V7 0.004829335
## V8 0.319415917
## V9 .
## V10 .
## V11 0.142498519
## V12 .
## V13 .
## V14 -1.059978702
## V15 .
## V16 .
## V17 .
## V18 .
## V19 .
## V20 -1.021873704
Users can also make predictions at specific \(\lambda\)’s with new input data:
set.seed(29)
nx <- matrix(rnorm(5 * 20), 5, 20)
predict(fit, newx = nx,type="response", s = c(0.1, 0.05))
## s1 s2
## [1,] -4.3067990 -4.5979456
## [2,] -4.1244091 -4.3447727
## [3,] -0.1133939 -0.1859237
## [4,] 3.3458748 3.5270269
## [5,] -1.2366422 -1.2772955
data(BinomialExample)
x <- BinomialExample$x
y <- BinomialExample$y
fit <- glmnet(x, y, family = "binomial")
plot(fit,label=TRUE)
print(fit)
##
## Call: glmnet(x = x, y = y, family = "binomial")
##
## Df %Dev Lambda
## 1 0 0.00 0.240500
## 2 1 2.90 0.219100
## 3 1 5.34 0.199600
## 4 2 8.86 0.181900
## 5 2 11.95 0.165800
## 6 2 14.59 0.151000
## 7 2 16.88 0.137600
## 8 3 18.95 0.125400
## 9 7 22.38 0.114200
## 10 8 26.26 0.104100
## 11 8 29.73 0.094850
## 12 8 32.77 0.086420
## 13 9 35.58 0.078750
## 14 11 38.98 0.071750
## 15 12 42.23 0.065380
## 16 12 45.29 0.059570
## 17 13 48.09 0.054280
## 18 13 50.63 0.049450
## 19 14 53.00 0.045060
## 20 14 55.19 0.041060
## 21 15 57.33 0.037410
## 22 15 59.43 0.034090
## 23 16 61.36 0.031060
## 24 17 63.15 0.028300
## 25 17 64.85 0.025790
## 26 18 66.42 0.023490
## 27 19 67.98 0.021410
## 28 20 69.44 0.019510
## 29 20 70.80 0.017770
## 30 21 72.10 0.016190
## 31 21 73.33 0.014760
## 32 23 74.52 0.013440
## 33 23 75.65 0.012250
## 34 24 76.72 0.011160
## 35 24 77.77 0.010170
## 36 25 78.77 0.009267
## 37 25 79.73 0.008444
## 38 26 80.66 0.007693
## 39 26 81.57 0.007010
## 40 27 82.48 0.006387
## 41 27 83.39 0.005820
## 42 27 84.30 0.005303
## 43 27 85.21 0.004832
## 44 27 86.12 0.004402
## 45 27 87.05 0.004011
## 46 28 87.96 0.003655
## 47 28 88.87 0.003330
## 48 28 89.76 0.003034
## 49 28 90.61 0.002765
## 50 28 91.41 0.002519
## 51 28 92.16 0.002295
## 52 28 92.86 0.002092
## 53 28 93.50 0.001906
## 54 28 94.08 0.001736
## 55 29 94.61 0.001582
## 56 29 95.10 0.001442
## 57 29 95.54 0.001314
## 58 29 95.95 0.001197
## 59 29 96.31 0.001091
## 60 29 96.64 0.000994
## 61 29 96.94 0.000905
## 62 29 97.22 0.000825
## 63 29 97.47 0.000752
## 64 29 97.69 0.000685
## 65 29 97.90 0.000624
## 66 29 98.09 0.000569
## 67 29 98.26 0.000518
## 68 29 98.41 0.000472
## 69 29 98.55 0.000430
## 70 29 98.68 0.000392
## 71 29 98.80 0.000357
## 72 30 98.91 0.000325
## 73 30 99.00 0.000296
## 74 30 99.09 0.000270
## 75 30 99.17 0.000246
## 76 30 99.25 0.000224
## 77 30 99.31 0.000204
## 78 30 99.37 0.000186
## 79 30 99.43 0.000170
## 80 30 99.48 0.000155
## 81 30 99.52 0.000141
## 82 30 99.57 0.000128
## 83 30 99.61 0.000117
## 84 30 99.64 0.000106
## 85 30 99.67 0.000097
## 86 30 99.70 0.000088
## 87 30 99.73 0.000081
## 88 30 99.75 0.000073
## 89 30 99.77 0.000067
## 90 30 99.79 0.000061
## 91 30 99.81 0.000056
## 92 30 99.83 0.000051
## 93 30 99.84 0.000046
## 94 30 99.86 0.000042
## 95 30 99.87 0.000038
## 96 30 99.88 0.000035
## 97 30 99.89 0.000032
## 98 30 99.90 0.000029
For the class corresponding to the second level of the factor response. In the following example, we make prediction of the class labels at \(\lambda=0.05,0.01\).
predict(fit, newx = x[1:5,], type = "class", s = c(0.05, 0.01))
## s1 s2
## [1,] "0" "0"
## [2,] "1" "1"
## [3,] "1" "1"
## [4,] "0" "0"
## [5,] "1" "1"
In most problem, you don’t have a large test data
- 5-fold CV Divide the set of observations into \(k\) folds(groups)(5 or 10 commonly used),
of approximately equal size.
So we randomly dividing the available set into two parts, a training set and a hold-out set.
set.seed(3)
data(Auto)
train <- sample(392,196)
lm.fit <- lm(mpg~horsepower,data=Auto,subset = train)
# We now use the predict() function to estimate the response for all 392 observations, and we use the mean() function to calculate the MSE of the 196 observations in the validation set.
attach(Auto)
mean((mpg - predict(lm.fit,Auto))[-train]^2) # estimated test MSE
## [1] 21.99239
lm.fit2 <- lm(mpg~poly(horsepower,2),data=Auto,subset=train)
mean((mpg-predict(lm.fit2,Auto))[-train]^2)
## [1] 17.70254
lm.fit3 <- lm(mpg~poly(horsepower,3),data=Auto,subset=train)
mean((mpg - predict(lm.fit3,Auto))[-train]^2)
## [1] 17.71032
Using different partition by change the seed. These results are consistent with out previous findings.
library(boot)
glm.fit <- glm(mpg~horsepower,data=Auto)
cv.err <- cv.glm(Auto,glm.fit) #if default two delta should be same
names(cv.err)
## [1] "call" "K" "delta" "seed"
cv.err$deltaa
## NULL
This plots the cross-validation curve (red dotted line) along with upper and lower standard deviation curves along the \(\lambda\) sequence (error bars).
cvfit <- cv.glmnet(x,y) #default is 10
plot(cvfit)
cvfit$lambda.min #minimum mean cross-validated error,
## [1] 0.0310587
cvfit$lambda.1se #cross valdiated error is whithin 1standard err of the minimum
## [1] 0.05427596
predict(cvfit,newx=x[1:5,],s="lambda.min")
## lambda.min
## [1,] 0.2932459
## [2,] 0.9290067
## [3,] 0.6103900
## [4,] 0.1700787
## [5,] 0.6023053