When this number of variables is close to or larger than the number of data points.
Data manipulation
# load data
crime <- suppressMessages(
readr::read_delim("http://www.statsci.org/data/general/uscrime.txt", delim="\t"))
head(crime)
## # A tibble: 6 × 16
## M So Ed Po1 Po2 LF M.F Pop NW U1 U2 Wealth Ineq
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 15.1 1 9.1 5.8 5.6 0.51 95 33 30.1 0.108 4.1 3940 26.1
## 2 14.3 0 11.3 10.3 9.5 0.583 101. 13 10.2 0.096 3.6 5570 19.4
## 3 14.2 1 8.9 4.5 4.4 0.533 96.9 18 21.9 0.094 3.3 3180 25
## 4 13.6 0 12.1 14.9 14.1 0.577 99.4 157 8 0.102 3.9 6730 16.7
## 5 14.1 0 12.1 10.9 10.1 0.591 98.5 18 3 0.091 2 5780 17.4
## 6 12.1 0 11 11.8 11.5 0.547 96.4 25 4.4 0.084 2.9 6890 12.6
## # ℹ 3 more variables: Prob <dbl>, Time <dbl>, Crime <dbl>
# scale the data, except: so and crime variables
# we will recode the categorical variable as a factor and center (and scale) the numerical features.
crime[, c(1, 3:15)] <- lapply(crime[, c(1,3:15)], function(x) round(scale(x), 2))
head(crime)
## # A tibble: 6 × 16
## M[,1] So Ed[,1] Po1[,1] Po2[,1] LF[,1] M.F[,1] Pop[,1] NW[,1] U1[,1] U2[,1]
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.99 1 -1.31 -0.91 -0.87 -1.27 -1.12 -0.1 1.94 0.7 0.83
## 2 0.35 0 0.66 0.61 0.53 0.54 0.98 -0.62 0.01 0.03 0.24
## 3 0.27 1 -1.49 -1.35 -1.3 -0.7 -0.48 -0.49 1.15 -0.08 -0.12
## 4 -0.2 0 1.37 2.15 2.17 0.39 0.37 3.16 -0.21 0.36 0.59
## 5 0.19 0 1.37 0.81 0.74 0.74 0.07 -0.49 -0.69 -0.25 -1.66
## 6 -1.4 0 0.39 1.11 1.24 -0.35 -0.65 -0.31 -0.56 -0.64 -0.59
## # ℹ 5 more variables: Wealth <dbl[,1]>, Ineq <dbl[,1]>, Prob <dbl[,1]>,
## # Time <dbl[,1]>, Crime <dbl>
# Model with all variables
mod_all <- lm(Crime ~ ., data=crime)
AIC(mod_all)
## [1] 650.1969
summary(mod_all)$r.squared
## [1] 0.8023824
# Intercept only model
mod0 <- lm(Crime ~ 1, data=crime)
AIC(mod0)
## [1] 696.4037
summary(mod0)$r.squared
## [1] 0
Pass the intercept only model (mod0) to the object argument and the full model (mod_all) to the scope argument.
library(MASS)
fmod <- stepAIC(object = mod0, direction = "forward",
scope = formula(mod_all), trace=FALSE)
coef(fmod)
## (Intercept) Po1 Ineq Ed M Prob
## 905.06369 341.66723 269.46799 219.39474 132.39880 -86.33974
## U2
## 75.78490
AIC(fmod)
## [1] 640.2218
summary(fmod)$r.squared
## [1] 0.7655889
Prediction
predict(fmod,crime[1:5,])
## 1 2 3 4 5
## 810.9613 1389.2908 382.6799 1894.3630 1268.5887
Forward elimination selects 7 variables. the AIC get lower until the model stops step7.
Compare to linear regression
(modeling using the selected variables)
lm <- lm(Crime~ Po1 +Ineq +Ed+ M +Prob +U2 , data=crime )
coefficients(lm)
## (Intercept) Po1 Ineq Ed M Prob
## 905.06369 341.66723 269.46799 219.39474 132.39880 -86.33974
## U2
## 75.78490
predict(lm,crime[1:5,])
## 1 2 3 4 5
## 810.9613 1389.2908 382.6799 1894.3630 1268.5887
Model interaction
# Model interaction
mod_inter <- lm(Crime ~ (Po1+ Ineq+ Ed)^2, data=crime)
itermod <- stepAIC(object = mod0, direction = "forward",
scope = formula(mod_inter), trace=FALSE)
coef(itermod)
## (Intercept) Po1 Ineq Ed Po1:Ineq Po1:Ed
## 979.3639 438.4753 296.9046 110.2526 224.1129 135.2383
bmod <- stepAIC(mod_all, direction = "backward",
scope = formula(mod0), trace=FALSE)
coef(bmod)
## (Intercept) M Ed Po1 M.F U1
## 905.14017 117.41855 201.06383 305.09942 65.90717 -109.64063
## U2 Ineq Prob
## 158.30781 244.67357 -86.20085
AIC(bmod)
## [1] 639.378
summary(bmod)$r.squared
## [1] 0.7885439
Backwards elimination selects 9 variables.
smod <- stepAIC(mod_all, direction = "both", trace=FALSE)
coef(smod)
## (Intercept) M Ed Po1 M.F U1
## 905.14017 117.41855 201.06383 305.09942 65.90717 -109.64063
## U2 Ineq Prob
## 158.30781 244.67357 -86.20085
AIC(smod)
## [1] 639.378
summary(smod)$r.squared
## [1] 0.7885439
8 variables are selected.
(10 fold, repeated 3 times; take the average of all the individual scores; then train the model on the entire initial training data set with the specific hyper-parameter value)
The main thing to remember here is that we have to keep the test data away from the algorithm and do all the validation only on the training data.
Three scenarios:
Collect another completely independent dataset and test your model.
If you have a sufficiently large dataset, you would then build your model on the remaining training samples and then test on these left-out samples.Usually splits the dataset into a large training set and a smaller test set (= holdout) and perform the k-fold CV on the training set, then validates on the test set.
Caret cv
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
set.seed(100599)
set_train <- trainControl(method="repeatedcv", number=10, repeats=3)
cvmod <- train(Crime ~ ., data=crime, scope = formula(mod0),
method="lmStepAIC", direction="backward", trace=FALSE, trControl=set_train)
coef(cvmod$finalModel)
## (Intercept) M Ed Po1 M.F U1
## 905.14017 117.41855 201.06383 305.09942 65.90717 -109.64063
## U2 Ineq Prob
## 158.30781 244.67357 -86.20085
AIC(cvmod$finalModel)
## [1] 639.378
8 are selected.
However, stepwise regression may not perform as well when testing the final set of variables on other data (external validation).
\[ \text{RSS} + \lambda \sum_{j=1}^p \beta^2_j \] - Use the glmnet library to run ridge regression by selecting a=0 and search lambda.
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-7
y <- as.matrix(dplyr::select(crime, Crime))
x <- as.matrix(dplyr::select(crime, -Crime))
lgrid <- c(seq(0, 1, by=0.1), seq(2, 9, by=1), seq(10, 100, by=10))
lgrid
## [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0 2.0
## [13] 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 20.0 30.0 40.0 50.0
## [25] 60.0 70.0 80.0 90.0 100.0
rmod <- glmnet(x, y, alpha=0, lambda=lgrid, standardize=FALSE)
rmod
##
## Call: glmnet(x = x, y = y, alpha = 0, lambda = lgrid, standardize = FALSE)
##
## Df %Dev Lambda
## 1 15 71.30 100.0
## 2 15 72.02 90.0
## 3 15 72.77 80.0
## 4 15 73.55 70.0
## 5 15 74.37 60.0
## 6 15 75.23 50.0
## 7 15 76.12 40.0
## 8 15 77.05 30.0
## 9 15 78.00 20.0
## 10 15 78.94 10.0
## 11 15 79.04 9.0
## 12 15 79.13 8.0
## 13 15 79.23 7.0
## 14 15 79.34 6.0
## 15 15 79.45 5.0
## 16 15 79.56 4.0
## 17 15 79.70 3.0
## 18 15 79.86 2.0
## 19 15 80.06 1.0
## 20 15 80.08 0.9
## 21 15 80.10 0.8
## 22 15 80.12 0.7
## 23 15 80.15 0.6
## 24 15 80.17 0.5
## 25 15 80.19 0.4
## 26 15 80.20 0.3
## 27 15 80.22 0.2
## 28 15 80.23 0.1
## 29 15 80.24 0.0
head(coefficients(rmod))
## 6 x 29 sparse Matrix of class "dgCMatrix"
## [[ suppressing 29 column names 's0', 's1', 's2' ... ]]
##
## (Intercept) 893.66759 893.10817 892.53788 891.92745 891.28518 890.62987
## M 64.69391 67.20381 69.81939 72.69136 75.87753 79.45993
## So 33.59076 35.23542 36.91299 38.70904 40.59930 42.52867
## Ed 67.08097 71.19265 76.11296 81.90149 88.81715 97.15989
## Po1 121.71739 124.75751 128.25083 132.18386 136.72461 142.05177
## Po2 103.92411 105.53695 106.99559 108.33996 109.45132 110.09939
##
## (Intercept) 890.02108 889.60670 889.80755 892.16799 892.65895 893.21804
## M 83.57168 88.41850 94.32950 101.70617 102.62660 103.55977
## So 44.32223 45.54545 44.96126 38.02894 36.58604 34.94187
## Ed 107.50593 120.66293 137.91918 161.82978 164.91402 167.96082
## Po1 148.73391 157.80022 171.81549 202.72238 208.50262 215.56935
## Po2 109.84822 107.65827 100.66127 75.51799 70.33953 63.59732
##
## (Intercept) 893.89170 894.69373 895.64500 896.77916 898.15385 899.86339
## M 104.50336 105.46737 106.46106 107.47822 108.49545 109.46906
## So 32.96078 30.60186 27.80342 24.46604 20.41923 15.38379
## Ed 171.22793 174.72607 178.49533 182.58790 187.14192 192.43667
## Po1 224.19824 235.03534 248.97365 267.46751 293.67173 333.84309
## Po2 55.21693 44.51089 30.53967 11.74259 -15.18629 -56.80820
##
## (Intercept) 902.08504 902.347924 902.621123 902.905172 903.200601
## M 110.28958 110.353913 110.412816 110.465261 110.511570
## So 8.83355 8.057916 7.251672 6.413203 5.541008
## Ed 199.12802 199.924713 200.759386 201.636561 202.551870
## Po1 403.39833 413.179128 423.822155 435.450999 447.943530
## Po2 -129.26932 -139.480956 -150.592259 -162.731437 -175.780841
##
## (Intercept) 903.508855 903.831434 904.169659 904.526009 904.9022622
## M 110.550077 110.579317 110.598464 110.604762 110.5971400
## So 4.630741 3.677921 2.678647 1.625484 0.5131887
## Ed 203.515918 204.537081 205.618849 206.777721 208.0179313
## Po1 461.563063 476.513507 492.871790 511.044932 531.1246832
## Po2 -190.010262 -205.631338 -222.728775 -241.722023 -262.7136098
##
## (Intercept) 905.3013351
## M 110.5728351
## So -0.6669407
## Ed 209.3547344
## Po1 553.4804102
## Po2 -286.0887385
Here, lamda =0 is the best fit.
set.seed(100599)
ridge_model <- cv.glmnet(x , y , alpha = 0,nfolds = 10, lambda=lgrid, family="gaussian",
standardize=FALSE )
coefficients(ridge_model)
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 893.667591
## M 64.693912
## So 33.590763
## Ed 67.080972
## Po1 121.717393
## Po2 103.924112
## LF 21.490218
## M.F 67.489948
## Pop 12.154418
## NW 45.323501
## U1 -34.947457
## U2 61.297790
## Wealth 31.434072
## Ineq 93.314725
## Prob -70.903143
## Time 7.194695
best coefficient
coef(ridge_model, s = "lambda.min")
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 890.0210847
## M 83.5716762
## So 44.3222262
## Ed 107.5059311
## Po1 148.7339145
## Po2 109.8482238
## LF 16.2463245
## M.F 69.9618382
## Pop -0.8359302
## NW 38.8316090
## U1 -54.6803908
## U2 89.5700285
## Wealth 39.9556249
## Ineq 152.6322583
## Prob -84.9418672
## Time 2.8485790
best parameter
ridge_model$lambda.min
## [1] 40
cv.glmnet is close to Caret glmnet.
Compare to linear regression
# lm <- lm(Crime~ . , data=crime )
# coefficients(lm)
#
# predict(lm,crime[1:5,])
predict(ridge_model, newx = x[1:5,], s = "lambda.min")
## lambda.min
## [1,] 719.5819
## [2,] 1315.3488
## [3,] 437.8303
## [4,] 1709.2264
## [5,] 1075.2227
# predict(ridge_model, newx = x[1:5,], s = "lambda.1se")
# predict(ridge_model, newx = x[1:5,] )
set.seed(100599)
set_train <- trainControl(method="cv", number=10 ,returnResamp="all"
)
rmod <- train(Crime~., data = crime, method = 'glmnet',
tuneGrid = expand.grid(alpha=0, lambda=lgrid), trControl = set_train)
rmod
## glmnet
##
## 47 samples
## 15 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 42, 43, 43, 41, 43, 41, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.0 257.2289 0.7029449 217.2414
## 0.1 257.2289 0.7029449 217.2414
## 0.2 257.2289 0.7029449 217.2414
## 0.3 257.2289 0.7029449 217.2414
## 0.4 257.2289 0.7029449 217.2414
## 0.5 257.2289 0.7029449 217.2414
## 0.6 257.2289 0.7029449 217.2414
## 0.7 257.2289 0.7029449 217.2414
## 0.8 257.2289 0.7029449 217.2414
## 0.9 257.2289 0.7029449 217.2414
## 1.0 257.2289 0.7029449 217.2414
## 2.0 257.2289 0.7029449 217.2414
## 3.0 257.2289 0.7029449 217.2414
## 4.0 257.2289 0.7029449 217.2414
## 5.0 257.2289 0.7029449 217.2414
## 6.0 257.2289 0.7029449 217.2414
## 7.0 257.2289 0.7029449 217.2414
## 8.0 257.2289 0.7029449 217.2414
## 9.0 257.2289 0.7029449 217.2414
## 10.0 257.2289 0.7029449 217.2414
## 20.0 257.2289 0.7029449 217.2414
## 30.0 256.9806 0.7043180 217.5083
## 40.0 256.0854 0.7031196 216.7806
## 50.0 255.8030 0.7007576 216.1458
## 60.0 255.8645 0.6978777 215.5843
## 70.0 256.1453 0.6948934 215.0973
## 80.0 256.5369 0.6919723 214.6500
## 90.0 257.0270 0.6891059 214.3080
## 100.0 257.5661 0.6863851 214.3654
##
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 50.
# method = glnet not lmstepaic
best coefficient
coef(rmod$finalModel, rmod$bestTune$lambda)
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 874.162322
## M 77.500286
## So 90.905740
## Ed 99.746419
## Po1 143.068832
## Po2 110.327142
## LF 23.541896
## M.F 69.456316
## Pop 2.194690
## NW 33.911995
## U1 -45.181541
## U2 80.767577
## Wealth 35.970781
## Ineq 132.686907
## Prob -85.497550
## Time 4.991453
best parameter
rmod$bestTune
## alpha lambda
## 24 0 50
plot(rmod$finalModel, xvar="lambda", label=TRUE)
las=glmnet(x, y, alpha=0, lambda=50, standardize=FALSE)
coef(las)
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 890.730136
## M 79.173098
## So 42.235812
## Ed 97.410662
## Po1 142.184940
## Po2 109.770955
## LF 17.702254
## M.F 70.475181
## Pop 2.404241
## NW 41.450035
## U1 -49.955164
## U2 82.705333
## Wealth 36.977663
## Ineq 137.754151
## Prob -82.329828
## Time 3.790268
# predict(las,x)
Essentially, when lamda (50 is selected) is large, the beta coefficients are zero. However, ridge regresssion benefit comes at the cost of increased bias.
\[ \text{RSS} + \lambda \sum_{j=1}^p |\beta_j|. \]
Some coefficients can be forced to exactly zero when lambda is sufficiently large.
lmod <- glmnet(x, y, alpha=1, lambda=lgrid, standardize=FALSE)
lmod
##
## Call: glmnet(x = x, y = y, alpha = 1, lambda = lgrid, standardize = FALSE)
##
## Df %Dev Lambda
## 1 1 40.28 100.0
## 2 1 41.61 90.0
## 3 1 42.79 80.0
## 4 3 44.40 70.0
## 5 4 48.12 60.0
## 6 5 54.85 50.0
## 7 5 60.68 40.0
## 8 6 65.37 30.0
## 9 8 71.60 20.0
## 10 10 76.76 10.0
## 11 10 77.25 9.0
## 12 11 77.69 8.0
## 13 11 78.15 7.0
## 14 11 78.55 6.0
## 15 11 78.89 5.0
## 16 11 79.16 4.0
## 17 12 79.39 3.0
## 18 13 79.55 2.0
## 19 15 79.80 1.0
## 20 15 79.88 0.9
## 21 15 79.95 0.8
## 22 15 80.01 0.7
## 23 15 80.07 0.6
## 24 15 80.12 0.5
## 25 15 80.16 0.4
## 26 15 80.19 0.3
## 27 15 80.21 0.2
## 28 15 80.23 0.1
## 29 15 80.24 0.0
g <- which(rev(lgrid==10))
coef(lmod)[, 10]
## (Intercept) M So Ed Po1 Po2
## 905.070218 90.059221 0.000000 129.547911 306.323406 0.000000
## LF M.F Pop NW U1 U2
## 0.000000 53.324806 0.000000 12.220954 -35.163512 69.807741
## Wealth Ineq Prob Time
## 4.605232 193.579685 -77.984628 0.000000
Coefficient
lasso_model <- cv.glmnet(x , y , alpha = 1 )
coefficients(lasso_model)
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 905.02218
## M 41.87200
## So .
## Ed .
## Po1 274.52562
## Po2 .
## LF .
## M.F 50.18259
## Pop .
## NW .
## U1 .
## U2 .
## Wealth .
## Ineq 73.44748
## Prob -47.83284
## Time .
Plot MSE
plot(lasso_model)
log(10)
## [1] 2.302585
Plot coefficients
plot(coef(lasso_model))
# total 16 variables
Here, four variables were reduced.
They are regularized (not LSD).
predict(lasso_model, newx = x[1:5,], s = "lambda.min")
## lambda.min
## [1,] 738.5723
## [2,] 1371.4215
## [3,] 405.3442
## [4,] 1778.4943
## [5,] 1150.2672
#
# predict(lasso_model, newx = x[1:5,], s = "lambda.1se")
B=matrix (coefficients(lasso_model), 1,16)
A <- matrix (c(1,x[2,]), 16,1)
B%*%A
## [,1]
## [1,] 1173.148
(Alpha = 1 and lambda = 10)
rmod_lasso <- train(Crime~., data = crime, method = 'glmnet',
tuneGrid = expand.grid(alpha=1, lambda=lgrid), trControl = set_train)
rmod_lasso
## glmnet
##
## 47 samples
## 15 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 42, 43, 42, 44, 43, 41, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.0 276.3308 0.5332602 206.8758
## 0.1 276.1340 0.5334472 206.8759
## 0.2 275.7119 0.5342752 207.0146
## 0.3 275.3985 0.5346291 207.2103
## 0.4 274.9290 0.5351602 207.2740
## 0.5 274.2746 0.5362311 207.1324
## 0.6 273.7096 0.5368691 207.0515
## 0.7 273.2021 0.5371730 207.1742
## 0.8 272.6339 0.5379234 207.4054
## 0.9 272.0800 0.5387381 208.0376
## 1.0 271.5368 0.5397225 208.6470
## 2.0 266.8659 0.5532462 209.4660
## 3.0 263.4551 0.5635557 208.1290
## 4.0 260.3451 0.5730960 206.6475
## 5.0 257.6395 0.5798266 205.4691
## 6.0 255.3381 0.5842742 204.7120
## 7.0 253.2581 0.5875854 203.8114
## 8.0 251.3008 0.5917263 203.0562
## 9.0 249.4835 0.5957328 202.4439
## 10.0 248.2197 0.5989190 202.0392
## 20.0 243.4194 0.6135743 202.2524
## 30.0 246.7950 0.6179306 208.2652
## 40.0 251.9887 0.6272594 211.7687
## 50.0 261.0851 0.6206761 216.0071
## 60.0 272.1425 0.5950684 224.8250
## 70.0 277.3382 0.5825096 230.7380
## 80.0 278.8386 0.5791517 232.3339
## 90.0 279.6290 0.5794788 232.9768
## 100.0 280.8392 0.5809463 233.6135
##
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 20.
Plot lambda and coefficient
plot(rmod_lasso$finalModel, xvar="lambda", label=TRUE)
Arbitrarily eliminates variables that are strongly correlated with each other.
Cross validation (stronger generalization) is how we choose the estimated best model with optimal hyper-parameter values (e.g. lambda). Use this same process with different types of algorithms like Ridge, LASSO, Elastic-Net, Random Forests, and Boosted trees.
\[ \text{RSS} + \lambda \sum_{j=1}^p |\beta_j| + (1-\lambda) \sum_{j=1}^p \beta_i^2. \] Search alpha and lambda using cross validation.
emod <- train(Crime~., data = crime, method = 'glmnet',
tuneGrid = expand.grid(alpha=seq(0, 1, by=0.1), lambda=lgrid), trControl = set_train)
emod
## glmnet
##
## 47 samples
## 15 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 42, 42, 43, 41, 41, 43, ...
## Resampling results across tuning parameters:
##
## alpha lambda RMSE Rsquared MAE
## 0.0 0.0 267.9867 0.7103081 217.6191
## 0.0 0.1 267.9867 0.7103081 217.6191
## 0.0 0.2 267.9867 0.7103081 217.6191
## 0.0 0.3 267.9867 0.7103081 217.6191
## 0.0 0.4 267.9867 0.7103081 217.6191
## 0.0 0.5 267.9867 0.7103081 217.6191
## 0.0 0.6 267.9867 0.7103081 217.6191
## 0.0 0.7 267.9867 0.7103081 217.6191
## 0.0 0.8 267.9867 0.7103081 217.6191
## 0.0 0.9 267.9867 0.7103081 217.6191
## 0.0 1.0 267.9867 0.7103081 217.6191
## 0.0 2.0 267.9867 0.7103081 217.6191
## 0.0 3.0 267.9867 0.7103081 217.6191
## 0.0 4.0 267.9867 0.7103081 217.6191
## 0.0 5.0 267.9867 0.7103081 217.6191
## 0.0 6.0 267.9867 0.7103081 217.6191
## 0.0 7.0 267.9867 0.7103081 217.6191
## 0.0 8.0 267.9867 0.7103081 217.6191
## 0.0 9.0 267.9867 0.7103081 217.6191
## 0.0 10.0 267.9867 0.7103081 217.6191
## 0.0 20.0 267.9867 0.7103081 217.6191
## 0.0 30.0 267.8207 0.7156791 217.8669
## 0.0 40.0 266.3735 0.7214246 217.1630
## 0.0 50.0 265.4512 0.7241696 217.2528
## 0.0 60.0 264.9578 0.7254418 217.5198
## 0.0 70.0 264.7475 0.7259260 217.7840
## 0.0 80.0 264.7332 0.7259267 218.0579
## 0.0 90.0 264.8753 0.7256124 218.3422
## 0.0 100.0 265.1214 0.7250537 218.8859
## 0.1 0.0 282.2145 0.6381494 224.3573
## 0.1 0.1 282.2145 0.6381494 224.3573
## 0.1 0.2 282.2145 0.6381494 224.3573
## 0.1 0.3 282.1224 0.6391422 224.3197
## 0.1 0.4 282.1348 0.6407200 224.4275
## 0.1 0.5 282.1468 0.6421246 224.6028
## 0.1 0.6 282.1520 0.6434051 224.8223
## 0.1 0.7 282.1432 0.6444584 225.0153
## 0.1 0.8 282.0984 0.6454335 225.1497
## 0.1 0.9 282.0040 0.6464008 225.2248
## 0.1 1.0 281.9073 0.6472505 225.2893
## 0.1 2.0 280.8271 0.6552424 225.5293
## 0.1 3.0 279.5050 0.6633663 224.8137
## 0.1 4.0 278.4694 0.6687272 224.3343
## 0.1 5.0 277.5655 0.6730923 224.0276
## 0.1 6.0 276.8250 0.6775106 223.6016
## 0.1 7.0 275.9645 0.6821456 223.0950
## 0.1 8.0 275.2401 0.6841830 222.7255
## 0.1 9.0 274.5865 0.6855753 222.4779
## 0.1 10.0 273.9281 0.6878141 222.1889
## 0.1 20.0 270.6819 0.7055926 220.6449
## 0.1 30.0 268.5494 0.7161526 219.1607
## 0.1 40.0 267.7081 0.7203113 218.8990
## 0.1 50.0 267.8287 0.7201634 219.4387
## 0.1 60.0 268.3943 0.7183583 220.5547
## 0.1 70.0 269.1727 0.7160210 221.6380
## 0.1 80.0 270.1477 0.7132266 222.6706
## 0.1 90.0 271.2655 0.7104941 223.6433
## 0.1 100.0 272.4833 0.7086027 224.5389
## 0.2 0.0 282.1715 0.6358431 224.1210
## 0.2 0.1 282.1715 0.6358431 224.1210
## 0.2 0.2 282.1376 0.6374559 224.2191
## 0.2 0.3 282.1703 0.6393600 224.4021
## 0.2 0.4 282.2028 0.6410499 224.5303
## 0.2 0.5 282.2311 0.6425895 224.7203
## 0.2 0.6 282.2439 0.6438261 224.9644
## 0.2 0.7 282.1770 0.6449047 225.1145
## 0.2 0.8 282.0708 0.6460316 225.2090
## 0.2 0.9 281.9587 0.6470258 225.2869
## 0.2 1.0 281.8092 0.6479373 225.3197
## 0.2 2.0 279.9909 0.6582548 224.9866
## 0.2 3.0 278.1909 0.6696328 223.8878
## 0.2 4.0 276.8695 0.6765556 223.3703
## 0.2 5.0 275.9494 0.6814504 223.0067
## 0.2 6.0 275.2315 0.6836590 222.6379
## 0.2 7.0 274.5998 0.6845719 222.3209
## 0.2 8.0 273.8714 0.6867008 222.0976
## 0.2 9.0 273.2440 0.6892718 221.8749
## 0.2 10.0 272.7557 0.6931899 221.7684
## 0.2 20.0 271.0862 0.7033115 221.3824
## 0.2 30.0 270.2693 0.7111838 221.4095
## 0.2 40.0 270.6792 0.7119936 222.2011
## 0.2 50.0 271.6117 0.7101775 222.9428
## 0.2 60.0 273.2194 0.7074163 223.9470
## 0.2 70.0 274.5612 0.7050320 224.6504
## 0.2 80.0 276.1277 0.7030020 225.4571
## 0.2 90.0 278.1574 0.7017226 226.6392
## 0.2 100.0 280.0406 0.7035427 227.6860
## 0.3 0.0 282.2111 0.6347904 224.0798
## 0.3 0.1 282.1409 0.6353096 224.0371
## 0.3 0.2 282.1650 0.6375557 224.2621
## 0.3 0.3 282.2258 0.6395780 224.4841
## 0.3 0.4 282.2816 0.6413630 224.6421
## 0.3 0.5 282.3259 0.6429038 224.8473
## 0.3 0.6 282.2749 0.6440762 225.0471
## 0.3 0.7 282.1584 0.6453093 225.1707
## 0.3 0.8 282.0399 0.6464900 225.2715
## 0.3 0.9 281.8700 0.6475334 225.3115
## 0.3 1.0 281.7280 0.6484494 225.3384
## 0.3 2.0 279.1822 0.6621204 224.3808
## 0.3 3.0 276.7985 0.6754202 223.0144
## 0.3 4.0 275.3082 0.6841230 222.4181
## 0.3 5.0 274.4350 0.6864008 222.0307
## 0.3 6.0 273.8678 0.6858433 221.8313
## 0.3 7.0 273.3288 0.6868808 221.6479
## 0.3 8.0 273.0673 0.6893341 221.7953
## 0.3 9.0 272.7577 0.6933337 221.9222
## 0.3 10.0 272.4726 0.6954661 221.9573
## 0.3 20.0 271.6896 0.7018986 222.0729
## 0.3 30.0 272.2899 0.7039463 223.5997
## 0.3 40.0 273.4156 0.7041095 224.7305
## 0.3 50.0 274.4283 0.7036317 225.1282
## 0.3 60.0 276.1833 0.7027524 225.6540
## 0.3 70.0 278.0034 0.7071001 226.4272
## 0.3 80.0 280.0004 0.7127841 227.1358
## 0.3 90.0 281.9071 0.7162638 228.2799
## 0.3 100.0 283.5251 0.7127113 229.1827
## 0.4 0.0 282.2133 0.6341863 224.0631
## 0.4 0.1 282.0922 0.6354579 224.0102
## 0.4 0.2 282.1889 0.6376835 224.3088
## 0.4 0.3 282.2850 0.6397667 224.5678
## 0.4 0.4 282.3678 0.6416413 224.7567
## 0.4 0.5 282.3941 0.6430558 224.9514
## 0.4 0.6 282.2849 0.6442898 225.1203
## 0.4 0.7 282.1773 0.6456601 225.2630
## 0.4 0.8 282.0065 0.6468283 225.3302
## 0.4 0.9 281.8374 0.6479136 225.3622
## 0.4 1.0 281.5767 0.6492087 225.3141
## 0.4 2.0 278.2798 0.6666606 223.7085
## 0.4 3.0 275.3971 0.6817248 222.1967
## 0.4 4.0 273.9149 0.6890896 221.5464
## 0.4 5.0 273.0703 0.6892025 221.1311
## 0.4 6.0 272.7160 0.6886944 221.0829
## 0.4 7.0 272.6513 0.6900556 221.4198
## 0.4 8.0 272.6270 0.6925159 221.8509
## 0.4 9.0 272.6177 0.6933039 222.1393
## 0.4 10.0 272.5153 0.6939565 222.2794
## 0.4 20.0 272.4065 0.6997167 223.4248
## 0.4 30.0 273.5408 0.6963566 224.7493
## 0.4 40.0 274.2136 0.6983301 225.1071
## 0.4 50.0 275.9446 0.7046946 225.6669
## 0.4 60.0 277.7905 0.7170586 226.0794
## 0.4 70.0 279.3552 0.7267813 226.7581
## 0.4 80.0 281.1939 0.7198099 227.8106
## 0.4 90.0 283.3367 0.7009370 228.6101
## 0.4 100.0 286.9290 0.6761310 230.5859
## 0.5 0.0 282.1819 0.6342825 224.0429
## 0.5 0.1 282.0815 0.6355018 224.0118
## 0.5 0.2 282.2152 0.6377713 224.3506
## 0.5 0.3 282.3510 0.6399289 224.6539
## 0.5 0.4 282.4572 0.6418021 224.8755
## 0.5 0.5 282.4075 0.6431488 225.0082
## 0.5 0.6 282.2975 0.6444902 225.1922
## 0.5 0.7 282.1556 0.6458766 225.3162
## 0.5 0.8 282.0012 0.6470005 225.3845
## 0.5 0.9 281.6926 0.6485579 225.3219
## 0.5 1.0 281.3257 0.6500658 225.2216
## 0.5 2.0 277.2843 0.6708940 223.0100
## 0.5 3.0 274.0469 0.6881730 221.4460
## 0.5 4.0 272.7308 0.6906541 220.8041
## 0.5 5.0 272.0349 0.6913020 220.4818
## 0.5 6.0 271.8956 0.6923228 220.6697
## 0.5 7.0 272.0208 0.6939745 221.2184
## 0.5 8.0 272.1519 0.6935425 221.7167
## 0.5 9.0 272.2858 0.6934733 222.0993
## 0.5 10.0 272.4462 0.6935347 222.4538
## 0.5 20.0 273.2216 0.6951982 224.3380
## 0.5 30.0 273.2244 0.6973132 224.5852
## 0.5 40.0 274.7790 0.7066753 225.2829
## 0.5 50.0 276.3396 0.7258486 225.0316
## 0.5 60.0 277.9630 0.7344463 226.0282
## 0.5 70.0 280.2847 0.7168362 226.8101
## 0.5 80.0 284.5511 0.6869677 229.1545
## 0.5 90.0 289.9508 0.6547420 233.0739
## 0.5 100.0 294.5685 0.6246517 238.8836
## 0.6 0.0 282.1431 0.6343766 224.0265
## 0.6 0.1 282.0886 0.6355442 224.0385
## 0.6 0.2 282.2577 0.6378656 224.4162
## 0.6 0.3 282.4295 0.6400801 224.7531
## 0.6 0.4 282.5313 0.6418114 224.9805
## 0.6 0.5 282.4312 0.6431595 225.0811
## 0.6 0.6 282.2942 0.6446206 225.2513
## 0.6 0.7 282.1274 0.6459980 225.3575
## 0.6 0.8 281.8631 0.6474967 225.3431
## 0.6 0.9 281.4653 0.6492499 225.2336
## 0.6 1.0 281.0580 0.6508027 225.1154
## 0.6 2.0 276.2856 0.6752832 222.3581
## 0.6 3.0 273.0574 0.6907393 220.9222
## 0.6 4.0 271.7132 0.6921854 220.2030
## 0.6 5.0 271.2641 0.6921596 220.1132
## 0.6 6.0 271.2592 0.6949398 220.4005
## 0.6 7.0 271.2323 0.6972491 220.8486
## 0.6 8.0 271.4355 0.6964189 221.3622
## 0.6 9.0 271.7356 0.6957327 221.8224
## 0.6 10.0 271.9332 0.6956937 222.1180
## 0.6 20.0 272.9930 0.6911846 224.3147
## 0.6 30.0 272.9654 0.7036591 224.4051
## 0.6 40.0 274.5592 0.7262200 224.0504
## 0.6 50.0 276.2095 0.7409007 225.1183
## 0.6 60.0 279.2107 0.7188902 226.1692
## 0.6 70.0 284.8965 0.6815513 229.3150
## 0.6 80.0 291.1848 0.6438233 234.9170
## 0.6 90.0 296.3457 0.6104644 241.8193
## 0.6 100.0 299.7811 0.5837349 246.8190
## 0.7 0.0 282.0879 0.6345417 223.9842
## 0.7 0.1 282.0762 0.6356119 224.0417
## 0.7 0.2 282.2890 0.6379596 224.4649
## 0.7 0.3 282.5110 0.6402135 224.8515
## 0.7 0.4 282.5628 0.6418125 225.0510
## 0.7 0.5 282.4248 0.6431599 225.1288
## 0.7 0.6 282.2875 0.6447062 225.3079
## 0.7 0.7 282.1092 0.6461021 225.3752
## 0.7 0.8 281.6845 0.6480724 225.2641
## 0.7 0.9 281.2485 0.6498339 225.1460
## 0.7 1.0 280.8039 0.6514819 225.0074
## 0.7 2.0 275.3303 0.6798943 221.8077
## 0.7 3.0 272.2020 0.6919032 220.4328
## 0.7 4.0 270.8384 0.6932404 219.6473
## 0.7 5.0 270.4660 0.6946781 219.7713
## 0.7 6.0 270.3502 0.6978148 219.9802
## 0.7 7.0 270.5003 0.6980292 220.5235
## 0.7 8.0 270.7211 0.6983400 220.9875
## 0.7 9.0 270.8302 0.6995239 221.3082
## 0.7 10.0 270.9102 0.7009279 221.5001
## 0.7 20.0 271.4986 0.6980374 223.3104
## 0.7 30.0 272.3223 0.7190536 223.4102
## 0.7 40.0 274.1656 0.7446365 224.0054
## 0.7 50.0 276.9240 0.7301240 224.9414
## 0.7 60.0 283.1706 0.6873359 228.1967
## 0.7 70.0 291.0570 0.6417600 234.9601
## 0.7 80.0 297.1981 0.6015790 243.1483
## 0.7 90.0 300.5143 0.5747440 248.3586
## 0.7 100.0 301.2913 0.5687831 249.5659
## 0.8 0.0 282.0973 0.6347159 224.0086
## 0.8 0.1 282.1281 0.6356518 224.1027
## 0.8 0.2 282.3807 0.6380462 224.5674
## 0.8 0.3 282.6432 0.6402688 224.9941
## 0.8 0.4 282.5855 0.6417264 225.1223
## 0.8 0.5 282.4274 0.6430902 225.1862
## 0.8 0.6 282.3035 0.6446492 225.3713
## 0.8 0.7 281.9509 0.6465242 225.3209
## 0.8 0.8 281.5044 0.6485400 225.2027
## 0.8 0.9 281.0384 0.6503006 225.0732
## 0.8 1.0 280.4992 0.6522192 224.8614
## 0.8 2.0 274.4354 0.6842914 221.3403
## 0.8 3.0 271.5667 0.6928877 220.0940
## 0.8 4.0 270.2746 0.6937432 219.4377
## 0.8 5.0 269.7822 0.6977966 219.4571
## 0.8 6.0 269.5774 0.6987853 219.6310
## 0.8 7.0 269.7350 0.6989139 220.1391
## 0.8 8.0 269.7719 0.7001315 220.4751
## 0.8 9.0 269.8069 0.7014826 220.6955
## 0.8 10.0 269.9167 0.7028482 220.8461
## 0.8 20.0 268.9844 0.7131807 221.5146
## 0.8 30.0 270.7614 0.7400896 221.6690
## 0.8 40.0 273.6388 0.7483917 223.5784
## 0.8 50.0 279.0800 0.7047302 225.7499
## 0.8 60.0 288.5628 0.6507826 232.2597
## 0.8 70.0 296.4135 0.6017975 242.5245
## 0.8 80.0 299.8963 0.5718992 248.0992
## 0.8 90.0 300.5801 0.5679415 249.2550
## 0.8 100.0 301.1049 0.5772718 249.7694
## 0.9 0.0 282.1169 0.6349852 224.0361
## 0.9 0.1 282.1870 0.6357768 224.1702
## 0.9 0.2 282.3960 0.6387656 224.6021
## 0.9 0.3 282.5990 0.6410238 225.0103
## 0.9 0.4 282.5033 0.6423842 225.1237
## 0.9 0.5 282.3827 0.6438671 225.2027
## 0.9 0.6 282.2830 0.6452661 225.3743
## 0.9 0.7 281.7964 0.6475537 225.2540
## 0.9 0.8 281.2983 0.6495271 225.1220
## 0.9 0.9 280.7482 0.6515044 224.9364
## 0.9 1.0 280.0819 0.6540955 224.5850
## 0.9 2.0 273.7097 0.6880828 220.9935
## 0.9 3.0 270.9559 0.6936249 219.7917
## 0.9 4.0 269.9743 0.6942203 219.4381
## 0.9 5.0 269.4666 0.6980420 219.4327
## 0.9 6.0 269.3323 0.6987100 219.6750
## 0.9 7.0 269.1620 0.7003166 219.8913
## 0.9 8.0 268.9986 0.7022015 220.0335
## 0.9 9.0 268.7435 0.7043960 219.9006
## 0.9 10.0 268.5708 0.7053651 220.0640
## 0.9 20.0 266.4047 0.7287805 219.2322
## 0.9 30.0 268.8879 0.7610655 220.3233
## 0.9 40.0 272.3394 0.7385722 221.9030
## 0.9 50.0 282.2570 0.6766966 227.2315
## 0.9 60.0 292.8818 0.6188706 238.8280
## 0.9 70.0 298.0289 0.5767929 246.4017
## 0.9 80.0 298.9339 0.5685706 248.0023
## 0.9 90.0 299.4043 0.5793482 248.5027
## 0.9 100.0 300.9650 0.5821633 249.3379
## 1.0 0.0 282.1218 0.6351366 224.0638
## 1.0 0.1 282.2365 0.6357921 224.2307
## 1.0 0.2 282.4633 0.6388121 224.6763
## 1.0 0.3 282.7293 0.6407147 225.1157
## 1.0 0.4 282.6099 0.6420099 225.2215
## 1.0 0.5 282.5119 0.6435425 225.3078
## 1.0 0.6 282.2310 0.6454610 225.3668
## 1.0 0.7 281.7450 0.6477398 225.2490
## 1.0 0.8 281.2139 0.6496842 225.1118
## 1.0 0.9 280.5532 0.6521246 224.8188
## 1.0 1.0 279.7518 0.6560431 224.3219
## 1.0 2.0 273.2333 0.6897743 220.7984
## 1.0 3.0 270.6088 0.6939710 219.6254
## 1.0 4.0 269.7394 0.6955182 219.4417
## 1.0 5.0 269.3804 0.6978890 219.5128
## 1.0 6.0 269.2274 0.6988690 219.7504
## 1.0 7.0 268.9401 0.7007383 219.9600
## 1.0 8.0 268.6688 0.7026703 219.8303
## 1.0 9.0 268.1249 0.7048013 219.6412
## 1.0 10.0 268.1023 0.7047159 219.7496
## 1.0 20.0 264.5474 0.7426244 217.4148
## 1.0 30.0 267.2228 0.7694693 219.1212
## 1.0 40.0 273.1833 0.7167680 221.7460
## 1.0 50.0 287.0382 0.6491259 232.1134
## 1.0 60.0 295.8886 0.5932294 243.6584
## 1.0 70.0 298.3317 0.5677060 247.3982
## 1.0 80.0 298.2032 0.5781592 247.5677
## 1.0 90.0 299.6228 0.5823179 248.3463
## 1.0 100.0 302.1193 0.5829069 249.8504
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 20.
Alpha = 1 and lambda = 6 are selected.
enmod <- glmnet(x, y, nfolds=10, alpha=1, lambda=6, standardize=FALSE)
coef(enmod)
## 16 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) 905.079515
## M 100.431328
## So .
## Ed 155.209834
## Po1 298.018275
## Po2 .
## LF .
## M.F 53.586868
## Pop -8.510894
## NW 16.008402
## U1 -58.997719
## U2 99.590072
## Wealth 36.407901
## Ineq 229.890906
## Prob -81.810625
## Time .
(Recursive feature elimination)
set.seed(100)
options(warn=-1)
subsets <- c(1:14) #tries all possible solutions (i.e., only 1 feature, 2 features, …, 13 features) to find the optimal features
ctrl <- rfeControl(functions = rfFuncs, #random forest algorithm
method = "repeatedcv", #k fold cross validation repeated 5 times
repeats = 5,
verbose = FALSE, number=10)
lmProfile <- rfe(x=x, y=y,
sizes = subsets,
rfeControl = ctrl)
lmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 5 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 355.0 0.4138 285.0 146.2 0.3441 125.81
## 2 324.7 0.4370 257.0 122.1 0.3421 102.12
## 3 316.0 0.4978 247.3 109.2 0.3275 90.60
## 4 281.7 0.6169 215.3 114.6 0.3154 92.13
## 5 279.2 0.6124 210.4 107.2 0.3061 87.87
## 6 281.3 0.6075 210.6 112.7 0.3143 91.02
## 7 278.6 0.5839 208.5 107.7 0.3083 92.45
## 8 273.9 0.5993 207.2 105.7 0.3099 90.31
## 9 273.8 0.6160 207.3 109.0 0.3073 93.25
## 10 272.1 0.6129 208.0 106.3 0.3055 89.73
## 11 270.7 0.6225 207.7 105.0 0.3057 88.60
## 12 272.7 0.6174 207.7 106.7 0.3090 90.27
## 13 269.0 0.6181 206.6 103.6 0.3071 87.61
## 14 268.1 0.6299 205.8 102.1 0.3111 86.75 *
## 15 271.4 0.6268 208.4 105.5 0.3121 89.13
##
## The top 5 variables (out of 14):
## Po1, Po2, NW, Prob, Wealth
RFE recommends 14 features for the model.
predictors(lmProfile)
## [1] "Po1" "Po2" "NW" "Prob" "Wealth" "Ed" "LF" "M"
## [9] "Ineq" "Time" "Pop" "So" "U2" "M.F"
Plot the accuracy of various combinations
plot(lmProfile)
Importance of variable
varimp_mars <- varImp(lmProfile)
varimp_data <- data.frame(feature = row.names(varImp(lmProfile))[1:15],
importance = varImp(lmProfile)[1:15, 1])
dotchart(varimp_data$importance,label=varimp_data$feature, main="Variable Importance")
ggplot(data = varimp_data,
aes(x = reorder(feature, -importance), y = importance, fill = feature)) +
geom_bar(stat="identity") + labs(x = "Features", y = "Variable Importance") +
geom_text(aes(label = round(importance, 2)), vjust=1.6, color="white", size=4) +
theme_bw() + theme(legend.position = "none")
# Post prediction
# postResample(predict(lmProfile, x_test), y_test)
This demonstrates the sensitivity of RFE in finding important features and eliminating less relevant features.
ATA and RFE may produce less desirable results if:
`the sample size is smaller;
non-random missingness is present in the response data set;
there are problematic items (e.g., items with low discrimination or high guessing).`
See 2.1: Machine learning workflow.
Available algorithms in caret
paste(names(getModelInfo()), collapse=', ')
## [1] "ada, AdaBag, AdaBoost.M1, adaboost, amdai, ANFIS, avNNet, awnb, awtan, bag, bagEarth, bagEarthGCV, bagFDA, bagFDAGCV, bam, bartMachine, bayesglm, binda, blackboost, blasso, blassoAveraged, bridge, brnn, BstLm, bstSm, bstTree, C5.0, C5.0Cost, C5.0Rules, C5.0Tree, cforest, chaid, CSimca, ctree, ctree2, cubist, dda, deepboost, DENFIS, dnn, dwdLinear, dwdPoly, dwdRadial, earth, elm, enet, evtree, extraTrees, fda, FH.GBML, FIR.DM, foba, FRBCS.CHI, FRBCS.W, FS.HGD, gam, gamboost, gamLoess, gamSpline, gaussprLinear, gaussprPoly, gaussprRadial, gbm_h2o, gbm, gcvEarth, GFS.FR.MOGUL, GFS.LT.RS, GFS.THRIFT, glm.nb, glm, glmboost, glmnet_h2o, glmnet, glmStepAIC, gpls, hda, hdda, hdrda, HYFIS, icr, J48, JRip, kernelpls, kknn, knn, krlsPoly, krlsRadial, lars, lars2, lasso, lda, lda2, leapBackward, leapForward, leapSeq, Linda, lm, lmStepAIC, LMT, loclda, logicBag, LogitBoost, logreg, lssvmLinear, lssvmPoly, lssvmRadial, lvq, M5, M5Rules, manb, mda, Mlda, mlp, mlpKerasDecay, mlpKerasDecayCost, mlpKerasDropout, mlpKerasDropoutCost, mlpML, mlpSGD, mlpWeightDecay, mlpWeightDecayML, monmlp, msaenet, multinom, mxnet, mxnetAdam, naive_bayes, nb, nbDiscrete, nbSearch, neuralnet, nnet, nnls, nodeHarvest, null, OneR, ordinalNet, ordinalRF, ORFlog, ORFpls, ORFridge, ORFsvm, ownn, pam, parRF, PART, partDSA, pcaNNet, pcr, pda, pda2, penalized, PenalizedLDA, plr, pls, plsRglm, polr, ppr, pre, PRIM, protoclass, qda, QdaCov, qrf, qrnn, randomGLM, ranger, rbf, rbfDDA, Rborist, rda, regLogistic, relaxo, rf, rFerns, RFlda, rfRules, ridge, rlda, rlm, rmda, rocc, rotationForest, rotationForestCp, rpart, rpart1SE, rpart2, rpartCost, rpartScore, rqlasso, rqnc, RRF, RRFglobal, rrlda, RSimca, rvmLinear, rvmPoly, rvmRadial, SBC, sda, sdwd, simpls, SLAVE, slda, smda, snn, sparseLDA, spikeslab, spls, stepLDA, stepQDA, superpc, svmBoundrangeString, svmExpoString, svmLinear, svmLinear2, svmLinear3, svmLinearWeights, svmLinearWeights2, svmPoly, svmRadial, svmRadialCost, svmRadialSigma, svmRadialWeights, svmSpectrumString, tan, tanSearch, treebag, vbmpRadial, vglmAdjCat, vglmContRatio, vglmCumulative, widekernelpls, WM, wsrf, xgbDART, xgbLinear, xgbTree, xyf"
set.seed(100)
options(warn=-1)
subsets <- c(1:14) #tries all possible solutions (i.e., only 1 feature, 2 features, …, 13 features) to find the optimal features
ctrl <- rfeControl(functions = lmFuncs, #random forest algorithm
method = "repeatedcv", #k fold cross validation repeated 5 times
repeats = 5,
verbose = FALSE, number=10)
lmProfile <- rfe(x=x, y=y,
sizes = subsets,
rfeControl = ctrl)
lmProfile
##
## Recursive feature selection
##
## Outer resampling method: Cross-Validated (10 fold, repeated 5 times)
##
## Resampling performance over subset size:
##
## Variables RMSE Rsquared MAE RMSESD RsquaredSD MAESD Selected
## 1 278.6 0.5873 227.9 105.32 0.3291 92.37
## 2 278.3 0.5704 227.5 104.63 0.3464 90.66
## 3 258.1 0.5788 210.3 100.59 0.3482 83.59
## 4 236.7 0.6251 194.2 111.30 0.3201 94.65 *
## 5 241.7 0.6296 202.8 105.32 0.3129 89.74
## 6 246.3 0.6179 203.8 99.47 0.3235 85.47
## 7 250.0 0.6096 207.9 99.28 0.3037 86.60
## 8 247.0 0.6357 206.0 98.03 0.2959 83.29
## 9 241.7 0.6222 200.3 97.63 0.3049 81.10
## 10 243.2 0.6431 203.0 100.97 0.2917 85.88
## 11 249.8 0.6331 207.9 102.79 0.2962 88.87
## 12 255.3 0.6027 211.7 97.89 0.3019 84.03
## 13 258.1 0.6088 213.8 98.64 0.3076 84.67
## 14 257.9 0.6093 213.6 98.90 0.2985 85.20
## 15 256.5 0.6110 213.0 99.24 0.2981 85.30
##
## The top 4 variables (out of 4):
## Po1, Po2, Ineq, Ed
only 4 are selected.