load("/Users/new/Desktop/ecob2000_leture1/NHIS_2014/NHIS_2014.RData")
attach(data_use1)
data_use1$earn_lastyr <- as.factor(data_use1$ERNYR_P)
levels(data_use1$earn_lastyr) <- c("0","$01-$4999","$5000-$9999","$10000-$14999","$15000-$19999","$20000-$24999","$25000-$34999","$35000-$44999","$45000-$54999","$55000-$64999","$65000-$74999","$75000 and over",NA,NA,NA)
##Narrow down the subset between age 21 to 75.
dat_2<-subset(data_use1, ((AGE_P>=21)&(AGE_P<=75)))
model_logit1 <- glm(NOTCOV ~ AGE_P + I(AGE_P^2) + female + Hispanic + Asian + educ_hs + educ_smcoll + educ_as + educ_bach + educ_adv + married + divorc_sep + REGION, family = binomial, data = dat_2)
summary(model_logit1)
##
## Call:
## glm(formula = NOTCOV ~ AGE_P + I(AGE_P^2) + female + Hispanic +
## Asian + educ_hs + educ_smcoll + educ_as + educ_bach + educ_adv +
## married + divorc_sep + REGION, family = binomial, data = dat_2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6401 -0.5943 -0.3776 -0.2001 3.2871
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.807e+00 1.253e-01 -22.411 < 2e-16 ***
## AGE_P 1.273e-01 5.935e-03 21.449 < 2e-16 ***
## I(AGE_P^2) -1.853e-03 6.788e-05 -27.297 < 2e-16 ***
## female -2.339e-01 2.222e-02 -10.525 < 2e-16 ***
## Hispanic 8.968e-01 2.553e-02 35.122 < 2e-16 ***
## Asian 2.233e-01 4.684e-02 4.768 1.86e-06 ***
## educ_hs -4.514e-01 2.988e-02 -15.107 < 2e-16 ***
## educ_smcoll -8.963e-01 3.498e-02 -25.619 < 2e-16 ***
## educ_as -1.028e+00 4.216e-02 -24.375 < 2e-16 ***
## educ_bach -1.676e+00 4.314e-02 -38.865 < 2e-16 ***
## educ_adv -2.266e+00 7.042e-02 -32.185 < 2e-16 ***
## married -5.717e-01 2.587e-02 -22.095 < 2e-16 ***
## divorc_sep -3.214e-02 3.769e-02 -0.853 0.394
## REGIONMidwest 2.168e-01 4.122e-02 5.260 1.44e-07 ***
## REGIONSouth 5.789e-01 3.555e-02 16.282 < 2e-16 ***
## REGIONWest 2.118e-01 3.732e-02 5.674 1.40e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 63569 on 73879 degrees of freedom
## Residual deviance: 53049 on 73864 degrees of freedom
## AIC: 53081
##
## Number of Fisher Scoring iterations: 6
exp(model_logit1$coefficients)
## (Intercept) AGE_P I(AGE_P^2) female Hispanic
## 0.06038342 1.13574922 0.99814885 0.79144828 2.45163612
## Asian educ_hs educ_smcoll educ_as educ_bach
## 1.25020550 0.63671027 0.40809134 0.35782125 0.18702824
## educ_adv married divorc_sep REGIONMidwest REGIONSouth
## 0.10368536 0.56457629 0.96836645 1.24213288 1.78403426
## REGIONWest
## 1.23587261
plot(coef(model_logit1))
Interpretation: 1. The variable “divorc_sep” is statistically insignificant.
2.Now we are analyzing variables that affect people not being covered(NOTCOV). To figure out which subset of people are more likely to have an insurance, we interpret the sign of coefficient reversely. People who are female is more likely to get covered. Educational level and getting insured are positively related. When looking at the marriage status, those get married and divorc_sep are more likely to have an insurance than others.
3.Probability of not getting covered. (exp(model_logit1$coefficients))
REGION: the coefficient of REGIONMidwest is 1.24213288 , (p/(1-p)=1.24213288 ). We got the probability of people in the midwest not geting insured is 55.35%. We got P(REGIONSouth)=64.08%, P( REGIONWest)=55.27%. As a result, we concluded that people in the West are less likely to have an insurance.
model_probit1 <- glm(NOTCOV ~ AGE_P + I(AGE_P^2) + female + Hispanic + Asian + educ_hs + educ_smcoll + educ_as + educ_bach + educ_adv + married + divorc_sep + REGION, family = binomial(link="probit"), data = dat_2)
summary(model_probit1)
##
## Call:
## glm(formula = NOTCOV ~ AGE_P + I(AGE_P^2) + female + Hispanic +
## Asian + educ_hs + educ_smcoll + educ_as + educ_bach + educ_adv +
## married + divorc_sep + REGION, family = binomial(link = "probit"),
## data = dat_2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5620 -0.6070 -0.3836 -0.1784 3.5273
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.565e+00 6.845e-02 -22.866 < 2e-16 ***
## AGE_P 6.786e-02 3.191e-03 21.265 < 2e-16 ***
## I(AGE_P^2) -9.814e-04 3.575e-05 -27.451 < 2e-16 ***
## female -1.245e-01 1.232e-02 -10.105 < 2e-16 ***
## Hispanic 5.143e-01 1.471e-02 34.966 < 2e-16 ***
## Asian 1.292e-01 2.519e-02 5.129 2.91e-07 ***
## educ_hs -2.610e-01 1.746e-02 -14.955 < 2e-16 ***
## educ_smcoll -5.058e-01 1.992e-02 -25.395 < 2e-16 ***
## educ_as -5.785e-01 2.342e-02 -24.699 < 2e-16 ***
## educ_bach -9.038e-01 2.270e-02 -39.819 < 2e-16 ***
## educ_adv -1.180e+00 3.320e-02 -35.559 < 2e-16 ***
## married -3.299e-01 1.443e-02 -22.855 < 2e-16 ***
## divorc_sep -1.993e-02 2.113e-02 -0.943 0.346
## REGIONMidwest 1.060e-01 2.217e-02 4.781 1.75e-06 ***
## REGIONSouth 3.073e-01 1.930e-02 15.920 < 2e-16 ***
## REGIONWest 1.193e-01 2.026e-02 5.886 3.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 63569 on 73879 degrees of freedom
## Residual deviance: 53087 on 73864 degrees of freedom
## AIC: 53119
##
## Number of Fisher Scoring iterations: 6
exp(model_probit1$coefficients)
## (Intercept) AGE_P I(AGE_P^2) female Hispanic
## 0.2090647 1.0702146 0.9990191 0.8829468 1.6724184
## Asian educ_hs educ_smcoll educ_as educ_bach
## 1.1379083 0.7702479 0.6030160 0.5607242 0.4050447
## educ_adv married divorc_sep REGIONMidwest REGIONSouth
## 0.3071353 0.7190060 0.9802626 1.1118070 1.3597487
## REGIONWest
## 1.1266753
Conclusion: the coefficients signs of Logit model and Probit model are the same. Both models demonstrate that “divorc_sep” is insignificant.
#using codes from lab
d_region <- data.frame(model.matrix(~ data_use1$REGION))
d_region_born <- data.frame(model.matrix(~ factor(data_use1$region_born))) # snips any with zero in the subgroup
dat_for_analysis_sub <- data.frame(
data_use1$NOTCOV,
data_use1$AGE_P,
data_use1$female,
data_use1$AfAm,
data_use1$Asian,
data_use1$RaceOther,
data_use1$Hispanic,
data_use1$educ_hs,
data_use1$educ_smcoll,
data_use1$educ_as,
data_use1$educ_bach,
data_use1$educ_adv,
data_use1$married,
data_use1$widowed,
data_use1$divorc_sep,
d_region[,2:4],
d_region_born[,2:12]) # need [] since model.matrix includes intercept term
names(dat_for_analysis_sub) <- c("NOTCOV",
"Age",
"female",
"AfAm",
"Asian",
"RaceOther",
"Hispanic",
"educ_hs",
"educ_smcoll",
"educ_as",
"educ_bach",
"educ_adv",
"married",
"widowed",
"divorc_sep",
"Region.Midwest",
"Region.South",
"Region.West",
"born.Mex.CentAm.Carib",
"born.S.Am",
"born.Eur",
"born.f.USSR",
"born.Africa",
"born.MidE",
"born.India.subc",
"born.Asia",
"born.SE.Asia",
"born.elsewhere",
"born.unknown")
require("standardize")
## Loading required package: standardize
set.seed(654321)
NN <- length(dat_for_analysis_sub$NOTCOV)
# restrict_1 <- as.logical(round(runif(NN,min=0,max=0.6))) # use fraction as training data
restrict_1 <- (runif(NN) < 0.1) # use 10% as training data
summary(restrict_1)
## Mode FALSE TRUE
## logical 100870 11183
dat_train <- subset(dat_for_analysis_sub, restrict_1)
dat_test <- subset(dat_for_analysis_sub, !restrict_1)
sobj <- standardize(NOTCOV ~ Age + female + AfAm + Asian + RaceOther + Hispanic +
educ_hs + educ_smcoll + educ_as + educ_bach + educ_adv +
married + widowed + divorc_sep +
Region.Midwest + Region.South + Region.West +
born.Mex.CentAm.Carib + born.S.Am + born.Eur + born.f.USSR +
born.Africa + born.MidE + born.India.subc + born.Asia +
born.SE.Asia + born.elsewhere + born.unknown, dat_train, family = binomial)
s_dat_test <- predict(sobj, dat_test)
model_logit1 <- glm(sobj$formula, family = binomial, data = sobj$data)
summary(model_logit1)
##
## Call:
## glm(formula = sobj$formula, family = binomial, data = sobj$data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7804 -0.5018 -0.3986 -0.2632 2.9370
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.69782 0.87347 3.089 0.002011 **
## Age -0.19411 0.05107 -3.801 0.000144 ***
## female1 -0.07271 0.03133 -2.321 0.020308 *
## AfAm1 -0.09339 0.04789 -1.950 0.051191 .
## Asian1 -0.13093 0.10570 -1.239 0.215465
## RaceOther1 0.47154 0.08467 5.569 2.56e-08 ***
## Hispanic1 0.13459 0.04606 2.922 0.003475 **
## educ_hs1 0.37334 0.04334 8.615 < 2e-16 ***
## educ_smcoll1 0.23167 0.04953 4.677 2.91e-06 ***
## educ_as1 0.23206 0.06380 3.637 0.000275 ***
## educ_bach1 -0.20639 0.06920 -2.982 0.002861 **
## educ_adv1 -0.58966 0.12690 -4.647 3.38e-06 ***
## married1 -0.18068 0.04531 -3.988 6.67e-05 ***
## widowed1 -0.42442 0.12056 -3.521 0.000431 ***
## divorc_sep1 0.11064 0.06220 1.779 0.075258 .
## Region.Midwest1 0.18917 0.06143 3.080 0.002073 **
## Region.South1 0.43674 0.05330 8.194 2.53e-16 ***
## Region.West1 0.16924 0.05698 2.970 0.002975 **
## born.Mex.CentAm.Carib1 0.94692 0.05253 18.025 < 2e-16 ***
## born.S.Am1 0.70720 0.12982 5.447 5.11e-08 ***
## born.Eur1 0.27642 0.14314 1.931 0.053475 .
## born.f.USSR1 0.14494 0.52524 0.276 0.782591
## born.Africa1 0.57577 0.15597 3.692 0.000223 ***
## born.MidE1 0.67806 0.26081 2.600 0.009327 **
## born.India.subc1 0.48919 0.18127 2.699 0.006963 **
## born.Asia1 0.41842 0.18432 2.270 0.023201 *
## born.SE.Asia1 0.43475 0.15039 2.891 0.003841 **
## born.elsewhere1 0.02772 0.27777 0.100 0.920511
## born.unknown1 0.29552 0.31308 0.944 0.345211
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8222.7 on 11182 degrees of freedom
## Residual deviance: 7160.6 on 11154 degrees of freedom
## AIC: 7218.6
##
## Number of Fisher Scoring iterations: 6
pred_vals <- predict(model_logit1, s_dat_test, type = "response")
## Warning: contrasts dropped from factor female
## Warning: contrasts dropped from factor AfAm
## Warning: contrasts dropped from factor Asian
## Warning: contrasts dropped from factor RaceOther
## Warning: contrasts dropped from factor Hispanic
## Warning: contrasts dropped from factor educ_hs
## Warning: contrasts dropped from factor educ_smcoll
## Warning: contrasts dropped from factor educ_as
## Warning: contrasts dropped from factor educ_bach
## Warning: contrasts dropped from factor educ_adv
## Warning: contrasts dropped from factor married
## Warning: contrasts dropped from factor widowed
## Warning: contrasts dropped from factor divorc_sep
## Warning: contrasts dropped from factor Region.Midwest
## Warning: contrasts dropped from factor Region.South
## Warning: contrasts dropped from factor Region.West
## Warning: contrasts dropped from factor born.Mex.CentAm.Carib
## Warning: contrasts dropped from factor born.S.Am
## Warning: contrasts dropped from factor born.Eur
## Warning: contrasts dropped from factor born.f.USSR
## Warning: contrasts dropped from factor born.Africa
## Warning: contrasts dropped from factor born.MidE
## Warning: contrasts dropped from factor born.India.subc
## Warning: contrasts dropped from factor born.Asia
## Warning: contrasts dropped from factor born.SE.Asia
## Warning: contrasts dropped from factor born.elsewhere
## Warning: contrasts dropped from factor born.unknown
pred_model_logit1 <- (pred_vals > 0.6)
table(pred = pred_model_logit1, true = dat_test$NOTCOV)
## true
## pred 0 1
## FALSE 88069 12008
## TRUE 379 414
The logit model gives a result of predicted 0, actual 0: 0.375%; predicted 0, actual 1: 0.41%; predicted 1, actual 0: 87.3%; predicted 1, actual 1: 11.9%. The model mis-classifies 87.71% of the data.
require('randomForest')
## Loading required package: randomForest
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
set.seed(54321)
model_randFor <- randomForest(as.factor(NOTCOV) ~ ., data = sobj$data, importance=TRUE, proximity=TRUE)
print(model_randFor)
##
## Call:
## randomForest(formula = as.factor(NOTCOV) ~ ., data = sobj$data, importance = TRUE, proximity = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 11.57%
## Confusion matrix:
## 0 1 class.error
## 0 9778 59 0.005997764
## 1 1235 111 0.917533432
round(importance(model_randFor),2)
## 0 1 MeanDecreaseAccuracy MeanDecreaseGini
## Age 27.75 7.19 30.47 224.63
## female 3.05 -3.52 0.57 22.64
## AfAm -5.05 9.69 1.49 15.69
## Asian 13.47 -9.70 10.37 9.56
## RaceOther 1.52 10.07 6.28 14.58
## Hispanic -6.50 19.02 16.71 52.34
## educ_hs 16.68 -9.48 13.62 21.47
## educ_smcoll 15.21 -5.48 13.98 14.54
## educ_as 11.91 -1.71 11.12 11.81
## educ_bach 14.89 11.59 17.21 19.00
## educ_adv 13.73 19.76 21.33 13.58
## married 20.92 -16.08 20.64 27.93
## widowed 2.67 -1.08 2.41 6.98
## divorc_sep 9.47 -2.83 9.07 13.07
## Region.Midwest 1.84 -0.95 1.30 11.19
## Region.South 7.04 8.54 12.13 23.13
## Region.West 7.81 -2.98 7.57 13.72
## born.Mex.CentAm.Carib 3.55 43.51 29.50 123.52
## born.S.Am -7.66 14.29 -1.11 7.84
## born.Eur 4.79 0.40 4.79 6.38
## born.f.USSR -5.68 -0.97 -5.70 0.84
## born.Africa -1.21 9.93 3.99 5.55
## born.MidE 7.20 -0.10 6.85 4.08
## born.India.subc 5.73 -3.47 4.99 4.67
## born.Asia 0.76 1.40 1.29 4.45
## born.SE.Asia 0.90 4.56 2.34 5.84
## born.elsewhere 3.73 -4.00 2.60 3.43
## born.unknown -5.86 -2.86 -6.30 1.94
varImpPlot(model_randFor)
# look at confusion matrix for this too
pred_model1 <- predict(model_randFor, s_dat_test)
table(pred = pred_model1, true = dat_test$NOTCOV)
## true
## pred 0 1
## 0 87844 11175
## 1 604 1247
The Randomforest model gives an accuracy of 88.32%.
require(e1071)
## Loading required package: e1071
# tuned_parameters <- tune.svm(as.factor(NOTCOV) ~ ., data = sobj$data, gamma = 10^(-3:0), cost = 10^(-2:1))
# summary(tuned_parameters)
# figure best parameters and input into next
svm.model <- svm(as.factor(NOTCOV) ~ ., data = sobj$data, cost = 10, gamma = 0.1)
svm.pred <- predict(svm.model, s_dat_test)
table(pred = svm.pred, true = dat_test$NOTCOV)
## true
## pred 0 1
## 0 86565 10432
## 1 1883 1990
The SVM model gives a 87.79% accuracy rate to the model prediction.
# Elastic Net
require(glmnet)
## Loading required package: glmnet
## Loading required package: Matrix
## Loaded glmnet 4.0-2
model1_elasticnet <- glmnet(as.matrix(sobj$data[,-1]),sobj$data$NOTCOV, standardize = T)
# default is alpha = 1, lasso
par(mar=c(4.5,4.5,1,4))
plot(model1_elasticnet)
vnat=coef(model1_elasticnet)
vnat=vnat[-1,ncol(vnat)] # remove the intercept, and get the coefficients at the end of the path
axis(4, at=vnat,line=-.5,label=names(sobj$data[,-1]),las=1,tick=FALSE, cex.axis=0.5)
plot(model1_elasticnet, xvar = "lambda")
plot(model1_elasticnet, xvar = "dev", label = TRUE)
print(model1_elasticnet)
##
## Call: glmnet(x = as.matrix(sobj$data[, -1]), y = sobj$data$NOTCOV, standardize = T)
##
## Df %Dev Lambda
## 1 0 0.00 0.088570
## 2 1 1.26 0.080700
## 3 1 2.30 0.073530
## 4 1 3.17 0.067000
## 5 1 3.89 0.061050
## 6 1 4.49 0.055630
## 7 1 4.98 0.050680
## 8 1 5.40 0.046180
## 9 1 5.74 0.042080
## 10 1 6.02 0.038340
## 11 1 6.26 0.034930
## 12 2 6.48 0.031830
## 13 2 6.70 0.029000
## 14 2 6.87 0.026430
## 15 3 7.06 0.024080
## 16 4 7.35 0.021940
## 17 5 7.63 0.019990
## 18 8 7.94 0.018210
## 19 8 8.27 0.016600
## 20 9 8.55 0.015120
## 21 10 8.80 0.013780
## 22 11 9.04 0.012550
## 23 11 9.26 0.011440
## 24 11 9.45 0.010420
## 25 11 9.60 0.009497
## 26 12 9.73 0.008654
## 27 13 9.86 0.007885
## 28 14 9.98 0.007184
## 29 16 10.09 0.006546
## 30 16 10.20 0.005965
## 31 19 10.31 0.005435
## 32 19 10.42 0.004952
## 33 19 10.50 0.004512
## 34 20 10.57 0.004111
## 35 22 10.64 0.003746
## 36 22 10.70 0.003413
## 37 22 10.75 0.003110
## 38 23 10.79 0.002834
## 39 23 10.83 0.002582
## 40 23 10.86 0.002353
## 41 24 10.89 0.002144
## 42 25 10.92 0.001953
## 43 25 10.94 0.001780
## 44 25 10.96 0.001622
## 45 25 10.98 0.001477
## 46 25 11.00 0.001346
## 47 25 11.01 0.001227
## 48 27 11.02 0.001118
## 49 27 11.03 0.001018
## 50 27 11.04 0.000928
## 51 27 11.04 0.000846
## 52 27 11.05 0.000770
## 53 27 11.06 0.000702
## 54 27 11.06 0.000640
## 55 27 11.06 0.000583
## 56 27 11.07 0.000531
## 57 27 11.07 0.000484
## 58 27 11.07 0.000441
## 59 28 11.07 0.000402
## 60 28 11.07 0.000366
## 61 28 11.08 0.000333
## 62 28 11.08 0.000304
## 63 28 11.08 0.000277
## 64 28 11.08 0.000252
## 65 28 11.08 0.000230
## 66 28 11.08 0.000209
## 67 28 11.08 0.000191
## 68 28 11.08 0.000174
## 69 28 11.08 0.000158
## 70 28 11.08 0.000144
## 71 28 11.08 0.000131
## 72 28 11.08 0.000120
## 73 28 11.08 0.000109
## 74 28 11.08 0.000099
## 75 28 11.08 0.000091
cvmodel1_elasticnet = cv.glmnet(data.matrix(sobj$data[,-1]),data.matrix(sobj$data$NOTCOV))
cvmodel1_elasticnet$lambda.min
## [1] 0.0001908214
log(cvmodel1_elasticnet$lambda.min)
## [1] -8.564172
coef(cvmodel1_elasticnet, s = "lambda.min")
## 29 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 2.677851194
## Age -0.016720206
## female 0.013975079
## AfAm 0.022968596
## Asian 0.019431325
## RaceOther -0.124798672
## Hispanic -0.028472013
## educ_hs -0.082619384
## educ_smcoll -0.049156459
## educ_as -0.049752687
## educ_bach 0.009310563
## educ_adv 0.030100355
## married 0.038664688
## widowed 0.059625120
## divorc_sep -0.017400745
## Region.Midwest -0.025960239
## Region.South -0.074356527
## Region.West -0.019547922
## born.Mex.CentAm.Carib -0.299102960
## born.S.Am -0.169030195
## born.Eur -0.044032389
## born.f.USSR -0.029664624
## born.Africa -0.116167937
## born.MidE -0.121751783
## born.India.subc -0.072892109
## born.Asia -0.062695577
## born.SE.Asia -0.070924454
## born.elsewhere -0.004683961
## born.unknown -0.047359786
pred1_elasnet <- predict(model1_elasticnet, newx = data.matrix(s_dat_test), s = cvmodel1_elasticnet$lambda.min)
pred_model1_elasnet <- (pred1_elasnet < mean(pred1_elasnet))
table(pred = pred_model1_elasnet, true = dat_test$NOTCOV)
## true
## pred 0 1
## FALSE 60246 4367
## TRUE 28202 8055
model2_elasticnet <- glmnet(as.matrix(sobj$data[,-1]),sobj$data$NOTCOV, alpha = 0)
# or try different alpha values to see if you can improve
##LASSO model 1.This model gives a 65.5999% accuracy. 2.% Dev represents the ratio of residual that can be explained by the model. The more it is closer to 1, the better. When Lambda become smaller, more variables are included into the model the higher the %Dev.Since the glmnet () can smartly stop testing when the marginal change of % Dev become smaller and stable. Then we considered the model performs the best. In the model above, when 28 variables are included into the model, lambda approaches 0.0004, we get a kind of satisfying model.
##Change alpha to 0.5(from ridge to elastic net)
pred1_elasnet <- predict(model1_elasticnet, newx = data.matrix(s_dat_test), s = cvmodel1_elasticnet$lambda.min)
pred_model1_elasnet <- (pred1_elasnet < mean(pred1_elasnet))
table(pred = pred_model1_elasnet, true = dat_test$NOTCOV)
## true
## pred 0 1
## FALSE 60246 4367
## TRUE 28202 8055
model2_elasticnet <- glmnet(as.matrix(sobj$data[,-1]),sobj$data$NOTCOV, alpha = 0.5)
When alpha=0.5, the Elastic Net has similar function as LASSO.