Variable selection

When this number of variables is close to or larger than the number of data points.

Stepwise regression

Data manipulation

# load data
crime <- suppressMessages(
  readr::read_delim("http://www.statsci.org/data/general/uscrime.txt", delim="\t"))
head(crime)

## # A tibble: 6 × 16
##       M    So    Ed   Po1   Po2    LF   M.F   Pop    NW    U1    U2 Wealth  Ineq
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
## 1  15.1     1   9.1   5.8   5.6 0.51   95      33  30.1 0.108   4.1   3940  26.1
## 2  14.3     0  11.3  10.3   9.5 0.583 101.     13  10.2 0.096   3.6   5570  19.4
## 3  14.2     1   8.9   4.5   4.4 0.533  96.9    18  21.9 0.094   3.3   3180  25  
## 4  13.6     0  12.1  14.9  14.1 0.577  99.4   157   8   0.102   3.9   6730  16.7
## 5  14.1     0  12.1  10.9  10.1 0.591  98.5    18   3   0.091   2     5780  17.4
## 6  12.1     0  11    11.8  11.5 0.547  96.4    25   4.4 0.084   2.9   6890  12.6
## # ℹ 3 more variables: Prob <dbl>, Time <dbl>, Crime <dbl>

# scale the data, except: so and crime variables

# we will recode the categorical variable as a factor and center (and scale) the numerical features.

crime[, c(1, 3:15)] <- lapply(crime[, c(1,3:15)], function(x) round(scale(x), 2))
head(crime)

## # A tibble: 6 × 16
##   M[,1]    So Ed[,1] Po1[,1] Po2[,1] LF[,1] M.F[,1] Pop[,1] NW[,1] U1[,1] U2[,1]
##   <dbl> <dbl>  <dbl>   <dbl>   <dbl>  <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>
## 1  0.99     1  -1.31   -0.91   -0.87  -1.27   -1.12   -0.1    1.94   0.7    0.83
## 2  0.35     0   0.66    0.61    0.53   0.54    0.98   -0.62   0.01   0.03   0.24
## 3  0.27     1  -1.49   -1.35   -1.3   -0.7    -0.48   -0.49   1.15  -0.08  -0.12
## 4 -0.2      0   1.37    2.15    2.17   0.39    0.37    3.16  -0.21   0.36   0.59
## 5  0.19     0   1.37    0.81    0.74   0.74    0.07   -0.49  -0.69  -0.25  -1.66
## 6 -1.4      0   0.39    1.11    1.24  -0.35   -0.65   -0.31  -0.56  -0.64  -0.59
## # ℹ 5 more variables: Wealth <dbl[,1]>, Ineq <dbl[,1]>, Prob <dbl[,1]>,
## #   Time <dbl[,1]>, Crime <dbl>

Forward direction for variables selection

# Model with all variables
mod_all <- lm(Crime ~ ., data=crime)
AIC(mod_all)

## [1] 650.1969

summary(mod_all)$r.squared

## [1] 0.8023824

# Intercept only model
mod0 <- lm(Crime ~ 1, data=crime)
AIC(mod0)

## [1] 696.4037

summary(mod0)$r.squared

## [1] 0

Pass the intercept only model (mod0) to the object argument and the full model (mod_all) to the scope argument.

library(MASS)
fmod <- stepAIC(object = mod0, direction = "forward", 
  scope = formula(mod_all), trace=FALSE)

coef(fmod)

## (Intercept)         Po1        Ineq          Ed           M        Prob 
##   905.06369   341.66723   269.46799   219.39474   132.39880   -86.33974 
##          U2 
##    75.78490

AIC(fmod)

## [1] 640.2218

summary(fmod)$r.squared

## [1] 0.7655889

Prediction

predict(fmod,crime[1:5,])

##         1         2         3         4         5 
##  810.9613 1389.2908  382.6799 1894.3630 1268.5887

Forward elimination selects 7 variables. the AIC get lower until the model stops step7.

Compare to linear regression

(modeling using the selected variables)

lm <- lm(Crime~  Po1   +Ineq  +Ed+ M   +Prob +U2 , data=crime )
coefficients(lm)

## (Intercept)         Po1        Ineq          Ed           M        Prob 
##   905.06369   341.66723   269.46799   219.39474   132.39880   -86.33974 
##          U2 
##    75.78490

predict(lm,crime[1:5,])

##         1         2         3         4         5 
##  810.9613 1389.2908  382.6799 1894.3630 1268.5887

Model interaction

# Model interaction
mod_inter <- lm(Crime ~  (Po1+ Ineq+ Ed)^2, data=crime)

itermod <- stepAIC(object = mod0, direction = "forward", 
  scope = formula(mod_inter), trace=FALSE)

coef(itermod)

## (Intercept)         Po1        Ineq          Ed    Po1:Ineq      Po1:Ed 
##    979.3639    438.4753    296.9046    110.2526    224.1129    135.2383

Backward direction for variables selection

bmod <- stepAIC(mod_all, direction = "backward", 
  scope = formula(mod0), trace=FALSE)
coef(bmod)

## (Intercept)           M          Ed         Po1         M.F          U1 
##   905.14017   117.41855   201.06383   305.09942    65.90717  -109.64063 
##          U2        Ineq        Prob 
##   158.30781   244.67357   -86.20085

AIC(bmod)

## [1] 639.378

summary(bmod)$r.squared

## [1] 0.7885439

Backwards elimination selects 9 variables.

Both forward and backward directions

smod <- stepAIC(mod_all, direction = "both", trace=FALSE)
coef(smod)

## (Intercept)           M          Ed         Po1         M.F          U1 
##   905.14017   117.41855   201.06383   305.09942    65.90717  -109.64063 
##          U2        Ineq        Prob 
##   158.30781   244.67357   -86.20085

AIC(smod)

## [1] 639.378

summary(smod)$r.squared

## [1] 0.7885439

8 variables are selected.

k-fold cross-validation for backward direction

(10 fold, repeated 3 times; take the average of all the individual scores; then train the model on the entire initial training data set with the specific hyper-parameter value)

The main thing to remember here is that we have to keep the test data away from the algorithm and do all the validation only on the training data.

Three scenarios:

Collect another completely independent dataset and test your model.
If you have a sufficiently large dataset, you would then build your model on the remaining training samples and then test on these left-out samples.Usually splits the dataset into a large training set and a smaller test set (= holdout) and perform the k-fold CV on the training set, then validates on the test set.

If you have a smaller dataset, the validation is performed on every fold (k-fold CV).

Caret cv

library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

set.seed(100599)

set_train <- trainControl(method="repeatedcv", number=10, repeats=3)

cvmod <- train(Crime ~ ., data=crime,  scope = formula(mod0),
  method="lmStepAIC", direction="backward", trace=FALSE, trControl=set_train)

coef(cvmod$finalModel)

## (Intercept)           M          Ed         Po1         M.F          U1 
##   905.14017   117.41855   201.06383   305.09942    65.90717  -109.64063 
##          U2        Ineq        Prob 
##   158.30781   244.67357   -86.20085

AIC(cvmod$finalModel)

## [1] 639.378

8 are selected.

However, stepwise regression may not perform as well when testing the final set of variables on other data (external validation).

Ridge regression

\[ \text{RSS} + \lambda \sum_{j=1}^p \beta^2_j \] - Use the glmnet library to run ridge regression by selecting a=0 and search lambda.

library(glmnet)

## Loading required package: Matrix

## Loaded glmnet 4.1-7

y <- as.matrix(dplyr::select(crime, Crime))
x <- as.matrix(dplyr::select(crime, -Crime))

lgrid <- c(seq(0, 1, by=0.1), seq(2, 9, by=1), seq(10, 100, by=10))
lgrid

##  [1]   0.0   0.1   0.2   0.3   0.4   0.5   0.6   0.7   0.8   0.9   1.0   2.0
## [13]   3.0   4.0   5.0   6.0   7.0   8.0   9.0  10.0  20.0  30.0  40.0  50.0
## [25]  60.0  70.0  80.0  90.0 100.0

rmod <- glmnet(x, y, alpha=0, lambda=lgrid, standardize=FALSE)
rmod

## 
## Call:  glmnet(x = x, y = y, alpha = 0, lambda = lgrid, standardize = FALSE) 
## 
##    Df  %Dev Lambda
## 1  15 71.30  100.0
## 2  15 72.02   90.0
## 3  15 72.77   80.0
## 4  15 73.55   70.0
## 5  15 74.37   60.0
## 6  15 75.23   50.0
## 7  15 76.12   40.0
## 8  15 77.05   30.0
## 9  15 78.00   20.0
## 10 15 78.94   10.0
## 11 15 79.04    9.0
## 12 15 79.13    8.0
## 13 15 79.23    7.0
## 14 15 79.34    6.0
## 15 15 79.45    5.0
## 16 15 79.56    4.0
## 17 15 79.70    3.0
## 18 15 79.86    2.0
## 19 15 80.06    1.0
## 20 15 80.08    0.9
## 21 15 80.10    0.8
## 22 15 80.12    0.7
## 23 15 80.15    0.6
## 24 15 80.17    0.5
## 25 15 80.19    0.4
## 26 15 80.20    0.3
## 27 15 80.22    0.2
## 28 15 80.23    0.1
## 29 15 80.24    0.0

head(coefficients(rmod))

## 6 x 29 sparse Matrix of class "dgCMatrix"

##   [[ suppressing 29 column names 's0', 's1', 's2' ... ]]

##                                                                        
## (Intercept) 893.66759 893.10817 892.53788 891.92745 891.28518 890.62987
## M            64.69391  67.20381  69.81939  72.69136  75.87753  79.45993
## So           33.59076  35.23542  36.91299  38.70904  40.59930  42.52867
## Ed           67.08097  71.19265  76.11296  81.90149  88.81715  97.15989
## Po1         121.71739 124.75751 128.25083 132.18386 136.72461 142.05177
## Po2         103.92411 105.53695 106.99559 108.33996 109.45132 110.09939
##                                                                        
## (Intercept) 890.02108 889.60670 889.80755 892.16799 892.65895 893.21804
## M            83.57168  88.41850  94.32950 101.70617 102.62660 103.55977
## So           44.32223  45.54545  44.96126  38.02894  36.58604  34.94187
## Ed          107.50593 120.66293 137.91918 161.82978 164.91402 167.96082
## Po1         148.73391 157.80022 171.81549 202.72238 208.50262 215.56935
## Po2         109.84822 107.65827 100.66127  75.51799  70.33953  63.59732
##                                                                        
## (Intercept) 893.89170 894.69373 895.64500 896.77916 898.15385 899.86339
## M           104.50336 105.46737 106.46106 107.47822 108.49545 109.46906
## So           32.96078  30.60186  27.80342  24.46604  20.41923  15.38379
## Ed          171.22793 174.72607 178.49533 182.58790 187.14192 192.43667
## Po1         224.19824 235.03534 248.97365 267.46751 293.67173 333.84309
## Po2          55.21693  44.51089  30.53967  11.74259 -15.18629 -56.80820
##                                                                       
## (Intercept)  902.08504  902.347924  902.621123  902.905172  903.200601
## M            110.28958  110.353913  110.412816  110.465261  110.511570
## So             8.83355    8.057916    7.251672    6.413203    5.541008
## Ed           199.12802  199.924713  200.759386  201.636561  202.551870
## Po1          403.39833  413.179128  423.822155  435.450999  447.943530
## Po2         -129.26932 -139.480956 -150.592259 -162.731437 -175.780841
##                                                                         
## (Intercept)  903.508855  903.831434  904.169659  904.526009  904.9022622
## M            110.550077  110.579317  110.598464  110.604762  110.5971400
## So             4.630741    3.677921    2.678647    1.625484    0.5131887
## Ed           203.515918  204.537081  205.618849  206.777721  208.0179313
## Po1          461.563063  476.513507  492.871790  511.044932  531.1246832
## Po2         -190.010262 -205.631338 -222.728775 -241.722023 -262.7136098
##                         
## (Intercept)  905.3013351
## M            110.5728351
## So            -0.6669407
## Ed           209.3547344
## Po1          553.4804102
## Po2         -286.0887385

Here, lamda =0 is the best fit.

set.seed(100599)

ridge_model <- cv.glmnet(x , y , alpha = 0,nfolds = 10,    lambda=lgrid, family="gaussian",
                standardize=FALSE )
coefficients(ridge_model)

## 16 x 1 sparse Matrix of class "dgCMatrix"
##                     s1
## (Intercept) 893.667591
## M            64.693912
## So           33.590763
## Ed           67.080972
## Po1         121.717393
## Po2         103.924112
## LF           21.490218
## M.F          67.489948
## Pop          12.154418
## NW           45.323501
## U1          -34.947457
## U2           61.297790
## Wealth       31.434072
## Ineq         93.314725
## Prob        -70.903143
## Time          7.194695

best coefficient

coef(ridge_model, s = "lambda.min")

## 16 x 1 sparse Matrix of class "dgCMatrix"
##                      s1
## (Intercept) 890.0210847
## M            83.5716762
## So           44.3222262
## Ed          107.5059311
## Po1         148.7339145
## Po2         109.8482238
## LF           16.2463245
## M.F          69.9618382
## Pop          -0.8359302
## NW           38.8316090
## U1          -54.6803908
## U2           89.5700285
## Wealth       39.9556249
## Ineq        152.6322583
## Prob        -84.9418672
## Time          2.8485790

best parameter

ridge_model$lambda.min

## [1] 40

cv.glmnet is close to Caret glmnet.

Compare to linear regression

# lm <- lm(Crime~ .  , data=crime )
# coefficients(lm)
# 
# predict(lm,crime[1:5,])

Prediction They are regularized (not LSD).

predict(ridge_model, newx = x[1:5,], s = "lambda.min")

##      lambda.min
## [1,]   719.5819
## [2,]  1315.3488
## [3,]   437.8303
## [4,]  1709.2264
## [5,]  1075.2227

# predict(ridge_model, newx = x[1:5,], s = "lambda.1se")

# predict(ridge_model, newx = x[1:5,] )

Using a cross-validated approach

set.seed(100599)

set_train <- trainControl(method="cv", number=10 ,returnResamp="all"
                    )

rmod <- train(Crime~., data = crime, method = 'glmnet', 
   tuneGrid = expand.grid(alpha=0, lambda=lgrid), trControl = set_train)
rmod

## glmnet 
## 
## 47 samples
## 15 predictors
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 42, 43, 43, 41, 43, 41, ... 
## Resampling results across tuning parameters:
## 
##   lambda  RMSE      Rsquared   MAE     
##     0.0   257.2289  0.7029449  217.2414
##     0.1   257.2289  0.7029449  217.2414
##     0.2   257.2289  0.7029449  217.2414
##     0.3   257.2289  0.7029449  217.2414
##     0.4   257.2289  0.7029449  217.2414
##     0.5   257.2289  0.7029449  217.2414
##     0.6   257.2289  0.7029449  217.2414
##     0.7   257.2289  0.7029449  217.2414
##     0.8   257.2289  0.7029449  217.2414
##     0.9   257.2289  0.7029449  217.2414
##     1.0   257.2289  0.7029449  217.2414
##     2.0   257.2289  0.7029449  217.2414
##     3.0   257.2289  0.7029449  217.2414
##     4.0   257.2289  0.7029449  217.2414
##     5.0   257.2289  0.7029449  217.2414
##     6.0   257.2289  0.7029449  217.2414
##     7.0   257.2289  0.7029449  217.2414
##     8.0   257.2289  0.7029449  217.2414
##     9.0   257.2289  0.7029449  217.2414
##    10.0   257.2289  0.7029449  217.2414
##    20.0   257.2289  0.7029449  217.2414
##    30.0   256.9806  0.7043180  217.5083
##    40.0   256.0854  0.7031196  216.7806
##    50.0   255.8030  0.7007576  216.1458
##    60.0   255.8645  0.6978777  215.5843
##    70.0   256.1453  0.6948934  215.0973
##    80.0   256.5369  0.6919723  214.6500
##    90.0   257.0270  0.6891059  214.3080
##   100.0   257.5661  0.6863851  214.3654
## 
## Tuning parameter 'alpha' was held constant at a value of 0
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 0 and lambda = 50.

# method = glnet not lmstepaic

best coefficient

coef(rmod$finalModel, rmod$bestTune$lambda)

## 16 x 1 sparse Matrix of class "dgCMatrix"
##                     s1
## (Intercept) 874.162322
## M            77.500286
## So           90.905740
## Ed           99.746419
## Po1         143.068832
## Po2         110.327142
## LF           23.541896
## M.F          69.456316
## Pop           2.194690
## NW           33.911995
## U1          -45.181541
## U2           80.767577
## Wealth       35.970781
## Ineq        132.686907
## Prob        -85.497550
## Time          4.991453

best parameter

rmod$bestTune

##    alpha lambda
## 24     0     50

plot(rmod$finalModel, xvar="lambda", label=TRUE)

las=glmnet(x, y, alpha=0, lambda=50, standardize=FALSE)
coef(las)

## 16 x 1 sparse Matrix of class "dgCMatrix"
##                     s0
## (Intercept) 890.730136
## M            79.173098
## So           42.235812
## Ed           97.410662
## Po1         142.184940
## Po2         109.770955
## LF           17.702254
## M.F          70.475181
## Pop           2.404241
## NW           41.450035
## U1          -49.955164
## U2           82.705333
## Wealth       36.977663
## Ineq        137.754151
## Prob        -82.329828
## Time          3.790268

# predict(las,x)

Essentially, when lamda (50 is selected) is large, the beta coefficients are zero. However, ridge regresssion benefit comes at the cost of increased bias.

Lasso regression

\[ \text{RSS} + \lambda \sum_{j=1}^p |\beta_j|. \]

Some coefficients can be forced to exactly zero when lambda is sufficiently large.

lmod <- glmnet(x, y, alpha=1, lambda=lgrid, standardize=FALSE)
lmod

## 
## Call:  glmnet(x = x, y = y, alpha = 1, lambda = lgrid, standardize = FALSE) 
## 
##    Df  %Dev Lambda
## 1   1 40.28  100.0
## 2   1 41.61   90.0
## 3   1 42.79   80.0
## 4   3 44.40   70.0
## 5   4 48.12   60.0
## 6   5 54.85   50.0
## 7   5 60.68   40.0
## 8   6 65.37   30.0
## 9   8 71.60   20.0
## 10 10 76.76   10.0
## 11 10 77.25    9.0
## 12 11 77.69    8.0
## 13 11 78.15    7.0
## 14 11 78.55    6.0
## 15 11 78.89    5.0
## 16 11 79.16    4.0
## 17 12 79.39    3.0
## 18 13 79.55    2.0
## 19 15 79.80    1.0
## 20 15 79.88    0.9
## 21 15 79.95    0.8
## 22 15 80.01    0.7
## 23 15 80.07    0.6
## 24 15 80.12    0.5
## 25 15 80.16    0.4
## 26 15 80.19    0.3
## 27 15 80.21    0.2
## 28 15 80.23    0.1
## 29 15 80.24    0.0

g <- which(rev(lgrid==10))
coef(lmod)[, 10]

## (Intercept)           M          So          Ed         Po1         Po2 
##  905.070218   90.059221    0.000000  129.547911  306.323406    0.000000 
##          LF         M.F         Pop          NW          U1          U2 
##    0.000000   53.324806    0.000000   12.220954  -35.163512   69.807741 
##      Wealth        Ineq        Prob        Time 
##    4.605232  193.579685  -77.984628    0.000000

Coefficient

lasso_model <- cv.glmnet(x , y , alpha = 1 )
coefficients(lasso_model)

## 16 x 1 sparse Matrix of class "dgCMatrix"
##                    s1
## (Intercept) 905.02218
## M            41.87200
## So            .      
## Ed            .      
## Po1         274.52562
## Po2           .      
## LF            .      
## M.F          50.18259
## Pop           .      
## NW            .      
## U1            .      
## U2            .      
## Wealth        .      
## Ineq         73.44748
## Prob        -47.83284
## Time          .

Plot MSE

plot(lasso_model)

log(10)

## [1] 2.302585

Plot coefficients

plot(coef(lasso_model))

# total 16 variables

Here, four variables were reduced.

Prediction

They are regularized (not LSD).

predict(lasso_model, newx = x[1:5,], s = "lambda.min")

##      lambda.min
## [1,]   738.5723
## [2,]  1371.4215
## [3,]   405.3442
## [4,]  1778.4943
## [5,]  1150.2672

#  
# predict(lasso_model, newx = x[1:5,], s = "lambda.1se")

B=matrix (coefficients(lasso_model), 1,16)
A <- matrix (c(1,x[2,]), 16,1)
B%*%A

##          [,1]
## [1,] 1173.148

Cross validation to search lambda

(Alpha = 1 and lambda = 10)

rmod_lasso <- train(Crime~., data = crime, method = 'glmnet',
   tuneGrid = expand.grid(alpha=1, lambda=lgrid), trControl = set_train)
rmod_lasso

## glmnet 
## 
## 47 samples
## 15 predictors
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 42, 43, 42, 44, 43, 41, ... 
## Resampling results across tuning parameters:
## 
##   lambda  RMSE      Rsquared   MAE     
##     0.0   276.3308  0.5332602  206.8758
##     0.1   276.1340  0.5334472  206.8759
##     0.2   275.7119  0.5342752  207.0146
##     0.3   275.3985  0.5346291  207.2103
##     0.4   274.9290  0.5351602  207.2740
##     0.5   274.2746  0.5362311  207.1324
##     0.6   273.7096  0.5368691  207.0515
##     0.7   273.2021  0.5371730  207.1742
##     0.8   272.6339  0.5379234  207.4054
##     0.9   272.0800  0.5387381  208.0376
##     1.0   271.5368  0.5397225  208.6470
##     2.0   266.8659  0.5532462  209.4660
##     3.0   263.4551  0.5635557  208.1290
##     4.0   260.3451  0.5730960  206.6475
##     5.0   257.6395  0.5798266  205.4691
##     6.0   255.3381  0.5842742  204.7120
##     7.0   253.2581  0.5875854  203.8114
##     8.0   251.3008  0.5917263  203.0562
##     9.0   249.4835  0.5957328  202.4439
##    10.0   248.2197  0.5989190  202.0392
##    20.0   243.4194  0.6135743  202.2524
##    30.0   246.7950  0.6179306  208.2652
##    40.0   251.9887  0.6272594  211.7687
##    50.0   261.0851  0.6206761  216.0071
##    60.0   272.1425  0.5950684  224.8250
##    70.0   277.3382  0.5825096  230.7380
##    80.0   278.8386  0.5791517  232.3339
##    90.0   279.6290  0.5794788  232.9768
##   100.0   280.8392  0.5809463  233.6135
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 20.

Plot lambda and coefficient

plot(rmod_lasso$finalModel, xvar="lambda", label=TRUE)

Arbitrarily eliminates variables that are strongly correlated with each other.

Cross validation (stronger generalization) is how we choose the estimated best model with optimal hyper-parameter values (e.g. lambda). Use this same process with different types of algorithms like Ridge, LASSO, Elastic-Net, Random Forests, and Boosted trees.

Elastic net

\[ \text{RSS} + \lambda \sum_{j=1}^p |\beta_j| + (1-\lambda) \sum_{j=1}^p \beta_i^2. \] Search alpha and lambda using cross validation.

emod <- train(Crime~., data = crime, method = 'glmnet',
  tuneGrid = expand.grid(alpha=seq(0, 1, by=0.1), lambda=lgrid), trControl = set_train)

emod

## glmnet 
## 
## 47 samples
## 15 predictors
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 42, 42, 43, 41, 41, 43, ... 
## Resampling results across tuning parameters:
## 
##   alpha  lambda  RMSE      Rsquared   MAE     
##   0.0      0.0   267.9867  0.7103081  217.6191
##   0.0      0.1   267.9867  0.7103081  217.6191
##   0.0      0.2   267.9867  0.7103081  217.6191
##   0.0      0.3   267.9867  0.7103081  217.6191
##   0.0      0.4   267.9867  0.7103081  217.6191
##   0.0      0.5   267.9867  0.7103081  217.6191
##   0.0      0.6   267.9867  0.7103081  217.6191
##   0.0      0.7   267.9867  0.7103081  217.6191
##   0.0      0.8   267.9867  0.7103081  217.6191
##   0.0      0.9   267.9867  0.7103081  217.6191
##   0.0      1.0   267.9867  0.7103081  217.6191
##   0.0      2.0   267.9867  0.7103081  217.6191
##   0.0      3.0   267.9867  0.7103081  217.6191
##   0.0      4.0   267.9867  0.7103081  217.6191
##   0.0      5.0   267.9867  0.7103081  217.6191
##   0.0      6.0   267.9867  0.7103081  217.6191
##   0.0      7.0   267.9867  0.7103081  217.6191
##   0.0      8.0   267.9867  0.7103081  217.6191
##   0.0      9.0   267.9867  0.7103081  217.6191
##   0.0     10.0   267.9867  0.7103081  217.6191
##   0.0     20.0   267.9867  0.7103081  217.6191
##   0.0     30.0   267.8207  0.7156791  217.8669
##   0.0     40.0   266.3735  0.7214246  217.1630
##   0.0     50.0   265.4512  0.7241696  217.2528
##   0.0     60.0   264.9578  0.7254418  217.5198
##   0.0     70.0   264.7475  0.7259260  217.7840
##   0.0     80.0   264.7332  0.7259267  218.0579
##   0.0     90.0   264.8753  0.7256124  218.3422
##   0.0    100.0   265.1214  0.7250537  218.8859
##   0.1      0.0   282.2145  0.6381494  224.3573
##   0.1      0.1   282.2145  0.6381494  224.3573
##   0.1      0.2   282.2145  0.6381494  224.3573
##   0.1      0.3   282.1224  0.6391422  224.3197
##   0.1      0.4   282.1348  0.6407200  224.4275
##   0.1      0.5   282.1468  0.6421246  224.6028
##   0.1      0.6   282.1520  0.6434051  224.8223
##   0.1      0.7   282.1432  0.6444584  225.0153
##   0.1      0.8   282.0984  0.6454335  225.1497
##   0.1      0.9   282.0040  0.6464008  225.2248
##   0.1      1.0   281.9073  0.6472505  225.2893
##   0.1      2.0   280.8271  0.6552424  225.5293
##   0.1      3.0   279.5050  0.6633663  224.8137
##   0.1      4.0   278.4694  0.6687272  224.3343
##   0.1      5.0   277.5655  0.6730923  224.0276
##   0.1      6.0   276.8250  0.6775106  223.6016
##   0.1      7.0   275.9645  0.6821456  223.0950
##   0.1      8.0   275.2401  0.6841830  222.7255
##   0.1      9.0   274.5865  0.6855753  222.4779
##   0.1     10.0   273.9281  0.6878141  222.1889
##   0.1     20.0   270.6819  0.7055926  220.6449
##   0.1     30.0   268.5494  0.7161526  219.1607
##   0.1     40.0   267.7081  0.7203113  218.8990
##   0.1     50.0   267.8287  0.7201634  219.4387
##   0.1     60.0   268.3943  0.7183583  220.5547
##   0.1     70.0   269.1727  0.7160210  221.6380
##   0.1     80.0   270.1477  0.7132266  222.6706
##   0.1     90.0   271.2655  0.7104941  223.6433
##   0.1    100.0   272.4833  0.7086027  224.5389
##   0.2      0.0   282.1715  0.6358431  224.1210
##   0.2      0.1   282.1715  0.6358431  224.1210
##   0.2      0.2   282.1376  0.6374559  224.2191
##   0.2      0.3   282.1703  0.6393600  224.4021
##   0.2      0.4   282.2028  0.6410499  224.5303
##   0.2      0.5   282.2311  0.6425895  224.7203
##   0.2      0.6   282.2439  0.6438261  224.9644
##   0.2      0.7   282.1770  0.6449047  225.1145
##   0.2      0.8   282.0708  0.6460316  225.2090
##   0.2      0.9   281.9587  0.6470258  225.2869
##   0.2      1.0   281.8092  0.6479373  225.3197
##   0.2      2.0   279.9909  0.6582548  224.9866
##   0.2      3.0   278.1909  0.6696328  223.8878
##   0.2      4.0   276.8695  0.6765556  223.3703
##   0.2      5.0   275.9494  0.6814504  223.0067
##   0.2      6.0   275.2315  0.6836590  222.6379
##   0.2      7.0   274.5998  0.6845719  222.3209
##   0.2      8.0   273.8714  0.6867008  222.0976
##   0.2      9.0   273.2440  0.6892718  221.8749
##   0.2     10.0   272.7557  0.6931899  221.7684
##   0.2     20.0   271.0862  0.7033115  221.3824
##   0.2     30.0   270.2693  0.7111838  221.4095
##   0.2     40.0   270.6792  0.7119936  222.2011
##   0.2     50.0   271.6117  0.7101775  222.9428
##   0.2     60.0   273.2194  0.7074163  223.9470
##   0.2     70.0   274.5612  0.7050320  224.6504
##   0.2     80.0   276.1277  0.7030020  225.4571
##   0.2     90.0   278.1574  0.7017226  226.6392
##   0.2    100.0   280.0406  0.7035427  227.6860
##   0.3      0.0   282.2111  0.6347904  224.0798
##   0.3      0.1   282.1409  0.6353096  224.0371
##   0.3      0.2   282.1650  0.6375557  224.2621
##   0.3      0.3   282.2258  0.6395780  224.4841
##   0.3      0.4   282.2816  0.6413630  224.6421
##   0.3      0.5   282.3259  0.6429038  224.8473
##   0.3      0.6   282.2749  0.6440762  225.0471
##   0.3      0.7   282.1584  0.6453093  225.1707
##   0.3      0.8   282.0399  0.6464900  225.2715
##   0.3      0.9   281.8700  0.6475334  225.3115
##   0.3      1.0   281.7280  0.6484494  225.3384
##   0.3      2.0   279.1822  0.6621204  224.3808
##   0.3      3.0   276.7985  0.6754202  223.0144
##   0.3      4.0   275.3082  0.6841230  222.4181
##   0.3      5.0   274.4350  0.6864008  222.0307
##   0.3      6.0   273.8678  0.6858433  221.8313
##   0.3      7.0   273.3288  0.6868808  221.6479
##   0.3      8.0   273.0673  0.6893341  221.7953
##   0.3      9.0   272.7577  0.6933337  221.9222
##   0.3     10.0   272.4726  0.6954661  221.9573
##   0.3     20.0   271.6896  0.7018986  222.0729
##   0.3     30.0   272.2899  0.7039463  223.5997
##   0.3     40.0   273.4156  0.7041095  224.7305
##   0.3     50.0   274.4283  0.7036317  225.1282
##   0.3     60.0   276.1833  0.7027524  225.6540
##   0.3     70.0   278.0034  0.7071001  226.4272
##   0.3     80.0   280.0004  0.7127841  227.1358
##   0.3     90.0   281.9071  0.7162638  228.2799
##   0.3    100.0   283.5251  0.7127113  229.1827
##   0.4      0.0   282.2133  0.6341863  224.0631
##   0.4      0.1   282.0922  0.6354579  224.0102
##   0.4      0.2   282.1889  0.6376835  224.3088
##   0.4      0.3   282.2850  0.6397667  224.5678
##   0.4      0.4   282.3678  0.6416413  224.7567
##   0.4      0.5   282.3941  0.6430558  224.9514
##   0.4      0.6   282.2849  0.6442898  225.1203
##   0.4      0.7   282.1773  0.6456601  225.2630
##   0.4      0.8   282.0065  0.6468283  225.3302
##   0.4      0.9   281.8374  0.6479136  225.3622
##   0.4      1.0   281.5767  0.6492087  225.3141
##   0.4      2.0   278.2798  0.6666606  223.7085
##   0.4      3.0   275.3971  0.6817248  222.1967
##   0.4      4.0   273.9149  0.6890896  221.5464
##   0.4      5.0   273.0703  0.6892025  221.1311
##   0.4      6.0   272.7160  0.6886944  221.0829
##   0.4      7.0   272.6513  0.6900556  221.4198
##   0.4      8.0   272.6270  0.6925159  221.8509
##   0.4      9.0   272.6177  0.6933039  222.1393
##   0.4     10.0   272.5153  0.6939565  222.2794
##   0.4     20.0   272.4065  0.6997167  223.4248
##   0.4     30.0   273.5408  0.6963566  224.7493
##   0.4     40.0   274.2136  0.6983301  225.1071
##   0.4     50.0   275.9446  0.7046946  225.6669
##   0.4     60.0   277.7905  0.7170586  226.0794
##   0.4     70.0   279.3552  0.7267813  226.7581
##   0.4     80.0   281.1939  0.7198099  227.8106
##   0.4     90.0   283.3367  0.7009370  228.6101
##   0.4    100.0   286.9290  0.6761310  230.5859
##   0.5      0.0   282.1819  0.6342825  224.0429
##   0.5      0.1   282.0815  0.6355018  224.0118
##   0.5      0.2   282.2152  0.6377713  224.3506
##   0.5      0.3   282.3510  0.6399289  224.6539
##   0.5      0.4   282.4572  0.6418021  224.8755
##   0.5      0.5   282.4075  0.6431488  225.0082
##   0.5      0.6   282.2975  0.6444902  225.1922
##   0.5      0.7   282.1556  0.6458766  225.3162
##   0.5      0.8   282.0012  0.6470005  225.3845
##   0.5      0.9   281.6926  0.6485579  225.3219
##   0.5      1.0   281.3257  0.6500658  225.2216
##   0.5      2.0   277.2843  0.6708940  223.0100
##   0.5      3.0   274.0469  0.6881730  221.4460
##   0.5      4.0   272.7308  0.6906541  220.8041
##   0.5      5.0   272.0349  0.6913020  220.4818
##   0.5      6.0   271.8956  0.6923228  220.6697
##   0.5      7.0   272.0208  0.6939745  221.2184
##   0.5      8.0   272.1519  0.6935425  221.7167
##   0.5      9.0   272.2858  0.6934733  222.0993
##   0.5     10.0   272.4462  0.6935347  222.4538
##   0.5     20.0   273.2216  0.6951982  224.3380
##   0.5     30.0   273.2244  0.6973132  224.5852
##   0.5     40.0   274.7790  0.7066753  225.2829
##   0.5     50.0   276.3396  0.7258486  225.0316
##   0.5     60.0   277.9630  0.7344463  226.0282
##   0.5     70.0   280.2847  0.7168362  226.8101
##   0.5     80.0   284.5511  0.6869677  229.1545
##   0.5     90.0   289.9508  0.6547420  233.0739
##   0.5    100.0   294.5685  0.6246517  238.8836
##   0.6      0.0   282.1431  0.6343766  224.0265
##   0.6      0.1   282.0886  0.6355442  224.0385
##   0.6      0.2   282.2577  0.6378656  224.4162
##   0.6      0.3   282.4295  0.6400801  224.7531
##   0.6      0.4   282.5313  0.6418114  224.9805
##   0.6      0.5   282.4312  0.6431595  225.0811
##   0.6      0.6   282.2942  0.6446206  225.2513
##   0.6      0.7   282.1274  0.6459980  225.3575
##   0.6      0.8   281.8631  0.6474967  225.3431
##   0.6      0.9   281.4653  0.6492499  225.2336
##   0.6      1.0   281.0580  0.6508027  225.1154
##   0.6      2.0   276.2856  0.6752832  222.3581
##   0.6      3.0   273.0574  0.6907393  220.9222
##   0.6      4.0   271.7132  0.6921854  220.2030
##   0.6      5.0   271.2641  0.6921596  220.1132
##   0.6      6.0   271.2592  0.6949398  220.4005
##   0.6      7.0   271.2323  0.6972491  220.8486
##   0.6      8.0   271.4355  0.6964189  221.3622
##   0.6      9.0   271.7356  0.6957327  221.8224
##   0.6     10.0   271.9332  0.6956937  222.1180
##   0.6     20.0   272.9930  0.6911846  224.3147
##   0.6     30.0   272.9654  0.7036591  224.4051
##   0.6     40.0   274.5592  0.7262200  224.0504
##   0.6     50.0   276.2095  0.7409007  225.1183
##   0.6     60.0   279.2107  0.7188902  226.1692
##   0.6     70.0   284.8965  0.6815513  229.3150
##   0.6     80.0   291.1848  0.6438233  234.9170
##   0.6     90.0   296.3457  0.6104644  241.8193
##   0.6    100.0   299.7811  0.5837349  246.8190
##   0.7      0.0   282.0879  0.6345417  223.9842
##   0.7      0.1   282.0762  0.6356119  224.0417
##   0.7      0.2   282.2890  0.6379596  224.4649
##   0.7      0.3   282.5110  0.6402135  224.8515
##   0.7      0.4   282.5628  0.6418125  225.0510
##   0.7      0.5   282.4248  0.6431599  225.1288
##   0.7      0.6   282.2875  0.6447062  225.3079
##   0.7      0.7   282.1092  0.6461021  225.3752
##   0.7      0.8   281.6845  0.6480724  225.2641
##   0.7      0.9   281.2485  0.6498339  225.1460
##   0.7      1.0   280.8039  0.6514819  225.0074
##   0.7      2.0   275.3303  0.6798943  221.8077
##   0.7      3.0   272.2020  0.6919032  220.4328
##   0.7      4.0   270.8384  0.6932404  219.6473
##   0.7      5.0   270.4660  0.6946781  219.7713
##   0.7      6.0   270.3502  0.6978148  219.9802
##   0.7      7.0   270.5003  0.6980292  220.5235
##   0.7      8.0   270.7211  0.6983400  220.9875
##   0.7      9.0   270.8302  0.6995239  221.3082
##   0.7     10.0   270.9102  0.7009279  221.5001
##   0.7     20.0   271.4986  0.6980374  223.3104
##   0.7     30.0   272.3223  0.7190536  223.4102
##   0.7     40.0   274.1656  0.7446365  224.0054
##   0.7     50.0   276.9240  0.7301240  224.9414
##   0.7     60.0   283.1706  0.6873359  228.1967
##   0.7     70.0   291.0570  0.6417600  234.9601
##   0.7     80.0   297.1981  0.6015790  243.1483
##   0.7     90.0   300.5143  0.5747440  248.3586
##   0.7    100.0   301.2913  0.5687831  249.5659
##   0.8      0.0   282.0973  0.6347159  224.0086
##   0.8      0.1   282.1281  0.6356518  224.1027
##   0.8      0.2   282.3807  0.6380462  224.5674
##   0.8      0.3   282.6432  0.6402688  224.9941
##   0.8      0.4   282.5855  0.6417264  225.1223
##   0.8      0.5   282.4274  0.6430902  225.1862
##   0.8      0.6   282.3035  0.6446492  225.3713
##   0.8      0.7   281.9509  0.6465242  225.3209
##   0.8      0.8   281.5044  0.6485400  225.2027
##   0.8      0.9   281.0384  0.6503006  225.0732
##   0.8      1.0   280.4992  0.6522192  224.8614
##   0.8      2.0   274.4354  0.6842914  221.3403
##   0.8      3.0   271.5667  0.6928877  220.0940
##   0.8      4.0   270.2746  0.6937432  219.4377
##   0.8      5.0   269.7822  0.6977966  219.4571
##   0.8      6.0   269.5774  0.6987853  219.6310
##   0.8      7.0   269.7350  0.6989139  220.1391
##   0.8      8.0   269.7719  0.7001315  220.4751
##   0.8      9.0   269.8069  0.7014826  220.6955
##   0.8     10.0   269.9167  0.7028482  220.8461
##   0.8     20.0   268.9844  0.7131807  221.5146
##   0.8     30.0   270.7614  0.7400896  221.6690
##   0.8     40.0   273.6388  0.7483917  223.5784
##   0.8     50.0   279.0800  0.7047302  225.7499
##   0.8     60.0   288.5628  0.6507826  232.2597
##   0.8     70.0   296.4135  0.6017975  242.5245
##   0.8     80.0   299.8963  0.5718992  248.0992
##   0.8     90.0   300.5801  0.5679415  249.2550
##   0.8    100.0   301.1049  0.5772718  249.7694
##   0.9      0.0   282.1169  0.6349852  224.0361
##   0.9      0.1   282.1870  0.6357768  224.1702
##   0.9      0.2   282.3960  0.6387656  224.6021
##   0.9      0.3   282.5990  0.6410238  225.0103
##   0.9      0.4   282.5033  0.6423842  225.1237
##   0.9      0.5   282.3827  0.6438671  225.2027
##   0.9      0.6   282.2830  0.6452661  225.3743
##   0.9      0.7   281.7964  0.6475537  225.2540
##   0.9      0.8   281.2983  0.6495271  225.1220
##   0.9      0.9   280.7482  0.6515044  224.9364
##   0.9      1.0   280.0819  0.6540955  224.5850
##   0.9      2.0   273.7097  0.6880828  220.9935
##   0.9      3.0   270.9559  0.6936249  219.7917
##   0.9      4.0   269.9743  0.6942203  219.4381
##   0.9      5.0   269.4666  0.6980420  219.4327
##   0.9      6.0   269.3323  0.6987100  219.6750
##   0.9      7.0   269.1620  0.7003166  219.8913
##   0.9      8.0   268.9986  0.7022015  220.0335
##   0.9      9.0   268.7435  0.7043960  219.9006
##   0.9     10.0   268.5708  0.7053651  220.0640
##   0.9     20.0   266.4047  0.7287805  219.2322
##   0.9     30.0   268.8879  0.7610655  220.3233
##   0.9     40.0   272.3394  0.7385722  221.9030
##   0.9     50.0   282.2570  0.6766966  227.2315
##   0.9     60.0   292.8818  0.6188706  238.8280
##   0.9     70.0   298.0289  0.5767929  246.4017
##   0.9     80.0   298.9339  0.5685706  248.0023
##   0.9     90.0   299.4043  0.5793482  248.5027
##   0.9    100.0   300.9650  0.5821633  249.3379
##   1.0      0.0   282.1218  0.6351366  224.0638
##   1.0      0.1   282.2365  0.6357921  224.2307
##   1.0      0.2   282.4633  0.6388121  224.6763
##   1.0      0.3   282.7293  0.6407147  225.1157
##   1.0      0.4   282.6099  0.6420099  225.2215
##   1.0      0.5   282.5119  0.6435425  225.3078
##   1.0      0.6   282.2310  0.6454610  225.3668
##   1.0      0.7   281.7450  0.6477398  225.2490
##   1.0      0.8   281.2139  0.6496842  225.1118
##   1.0      0.9   280.5532  0.6521246  224.8188
##   1.0      1.0   279.7518  0.6560431  224.3219
##   1.0      2.0   273.2333  0.6897743  220.7984
##   1.0      3.0   270.6088  0.6939710  219.6254
##   1.0      4.0   269.7394  0.6955182  219.4417
##   1.0      5.0   269.3804  0.6978890  219.5128
##   1.0      6.0   269.2274  0.6988690  219.7504
##   1.0      7.0   268.9401  0.7007383  219.9600
##   1.0      8.0   268.6688  0.7026703  219.8303
##   1.0      9.0   268.1249  0.7048013  219.6412
##   1.0     10.0   268.1023  0.7047159  219.7496
##   1.0     20.0   264.5474  0.7426244  217.4148
##   1.0     30.0   267.2228  0.7694693  219.1212
##   1.0     40.0   273.1833  0.7167680  221.7460
##   1.0     50.0   287.0382  0.6491259  232.1134
##   1.0     60.0   295.8886  0.5932294  243.6584
##   1.0     70.0   298.3317  0.5677060  247.3982
##   1.0     80.0   298.2032  0.5781592  247.5677
##   1.0     90.0   299.6228  0.5823179  248.3463
##   1.0    100.0   302.1193  0.5829069  249.8504
## 
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 20.

Alpha = 1 and lambda = 6 are selected.

enmod <- glmnet(x, y, nfolds=10, alpha=1, lambda=6, standardize=FALSE)

coef(enmod)

## 16 x 1 sparse Matrix of class "dgCMatrix"
##                     s0
## (Intercept) 905.079515
## M           100.431328
## So            .       
## Ed          155.209834
## Po1         298.018275
## Po2           .       
## LF            .       
## M.F          53.586868
## Pop          -8.510894
## NW           16.008402
## U1          -58.997719
## U2           99.590072
## Wealth       36.407901
## Ineq        229.890906
## Prob        -81.810625
## Time          .

Machine learning approch

(Recursive feature elimination)

set.seed(100)

options(warn=-1)

subsets <- c(1:14)  #tries all possible solutions (i.e., only 1 feature, 2 features, …, 13 features) to find the optimal features

ctrl <- rfeControl(functions = rfFuncs,  #random forest algorithm
                   method = "repeatedcv", #k fold cross validation repeated 5 times
                   repeats = 5,
                   verbose = FALSE, number=10)

lmProfile <- rfe(x=x, y=y,
                 sizes = subsets,
                 rfeControl = ctrl)

lmProfile

## 
## Recursive feature selection
## 
## Outer resampling method: Cross-Validated (10 fold, repeated 5 times) 
## 
## Resampling performance over subset size:
## 
##  Variables  RMSE Rsquared   MAE RMSESD RsquaredSD  MAESD Selected
##          1 355.0   0.4138 285.0  146.2     0.3441 125.81         
##          2 324.7   0.4370 257.0  122.1     0.3421 102.12         
##          3 316.0   0.4978 247.3  109.2     0.3275  90.60         
##          4 281.7   0.6169 215.3  114.6     0.3154  92.13         
##          5 279.2   0.6124 210.4  107.2     0.3061  87.87         
##          6 281.3   0.6075 210.6  112.7     0.3143  91.02         
##          7 278.6   0.5839 208.5  107.7     0.3083  92.45         
##          8 273.9   0.5993 207.2  105.7     0.3099  90.31         
##          9 273.8   0.6160 207.3  109.0     0.3073  93.25         
##         10 272.1   0.6129 208.0  106.3     0.3055  89.73         
##         11 270.7   0.6225 207.7  105.0     0.3057  88.60         
##         12 272.7   0.6174 207.7  106.7     0.3090  90.27         
##         13 269.0   0.6181 206.6  103.6     0.3071  87.61         
##         14 268.1   0.6299 205.8  102.1     0.3111  86.75        *
##         15 271.4   0.6268 208.4  105.5     0.3121  89.13         
## 
## The top 5 variables (out of 14):
##    Po1, Po2, NW, Prob, Wealth

RFE recommends 14 features for the model.

predictors(lmProfile)

##  [1] "Po1"    "Po2"    "NW"     "Prob"   "Wealth" "Ed"     "LF"     "M"     
##  [9] "Ineq"   "Time"   "Pop"    "So"     "U2"     "M.F"

Plot the accuracy of various combinations

plot(lmProfile)

Importance of variable

varimp_mars <- varImp(lmProfile)

varimp_data <- data.frame(feature = row.names(varImp(lmProfile))[1:15],
                          importance = varImp(lmProfile)[1:15, 1])

dotchart(varimp_data$importance,label=varimp_data$feature, main="Variable Importance")

ggplot(data = varimp_data, 
       aes(x = reorder(feature, -importance), y = importance, fill = feature)) +
  geom_bar(stat="identity") + labs(x = "Features", y = "Variable Importance") + 
  geom_text(aes(label = round(importance, 2)), vjust=1.6, color="white", size=4) + 
  theme_bw() + theme(legend.position = "none")

# Post prediction
# postResample(predict(lmProfile, x_test), y_test)

This demonstrates the sensitivity of RFE in finding important features and eliminating less relevant features.

ATA and RFE may produce less desirable results if:

`the sample size is smaller;

non-random missingness is present in the response data set;

there are problematic items (e.g., items with low discrimination or high guessing).`

See 2.1: Machine learning workflow.

Available algorithms in caret

paste(names(getModelInfo()), collapse=',  ')

## [1] "ada,  AdaBag,  AdaBoost.M1,  adaboost,  amdai,  ANFIS,  avNNet,  awnb,  awtan,  bag,  bagEarth,  bagEarthGCV,  bagFDA,  bagFDAGCV,  bam,  bartMachine,  bayesglm,  binda,  blackboost,  blasso,  blassoAveraged,  bridge,  brnn,  BstLm,  bstSm,  bstTree,  C5.0,  C5.0Cost,  C5.0Rules,  C5.0Tree,  cforest,  chaid,  CSimca,  ctree,  ctree2,  cubist,  dda,  deepboost,  DENFIS,  dnn,  dwdLinear,  dwdPoly,  dwdRadial,  earth,  elm,  enet,  evtree,  extraTrees,  fda,  FH.GBML,  FIR.DM,  foba,  FRBCS.CHI,  FRBCS.W,  FS.HGD,  gam,  gamboost,  gamLoess,  gamSpline,  gaussprLinear,  gaussprPoly,  gaussprRadial,  gbm_h2o,  gbm,  gcvEarth,  GFS.FR.MOGUL,  GFS.LT.RS,  GFS.THRIFT,  glm.nb,  glm,  glmboost,  glmnet_h2o,  glmnet,  glmStepAIC,  gpls,  hda,  hdda,  hdrda,  HYFIS,  icr,  J48,  JRip,  kernelpls,  kknn,  knn,  krlsPoly,  krlsRadial,  lars,  lars2,  lasso,  lda,  lda2,  leapBackward,  leapForward,  leapSeq,  Linda,  lm,  lmStepAIC,  LMT,  loclda,  logicBag,  LogitBoost,  logreg,  lssvmLinear,  lssvmPoly,  lssvmRadial,  lvq,  M5,  M5Rules,  manb,  mda,  Mlda,  mlp,  mlpKerasDecay,  mlpKerasDecayCost,  mlpKerasDropout,  mlpKerasDropoutCost,  mlpML,  mlpSGD,  mlpWeightDecay,  mlpWeightDecayML,  monmlp,  msaenet,  multinom,  mxnet,  mxnetAdam,  naive_bayes,  nb,  nbDiscrete,  nbSearch,  neuralnet,  nnet,  nnls,  nodeHarvest,  null,  OneR,  ordinalNet,  ordinalRF,  ORFlog,  ORFpls,  ORFridge,  ORFsvm,  ownn,  pam,  parRF,  PART,  partDSA,  pcaNNet,  pcr,  pda,  pda2,  penalized,  PenalizedLDA,  plr,  pls,  plsRglm,  polr,  ppr,  pre,  PRIM,  protoclass,  qda,  QdaCov,  qrf,  qrnn,  randomGLM,  ranger,  rbf,  rbfDDA,  Rborist,  rda,  regLogistic,  relaxo,  rf,  rFerns,  RFlda,  rfRules,  ridge,  rlda,  rlm,  rmda,  rocc,  rotationForest,  rotationForestCp,  rpart,  rpart1SE,  rpart2,  rpartCost,  rpartScore,  rqlasso,  rqnc,  RRF,  RRFglobal,  rrlda,  RSimca,  rvmLinear,  rvmPoly,  rvmRadial,  SBC,  sda,  sdwd,  simpls,  SLAVE,  slda,  smda,  snn,  sparseLDA,  spikeslab,  spls,  stepLDA,  stepQDA,  superpc,  svmBoundrangeString,  svmExpoString,  svmLinear,  svmLinear2,  svmLinear3,  svmLinearWeights,  svmLinearWeights2,  svmPoly,  svmRadial,  svmRadialCost,  svmRadialSigma,  svmRadialWeights,  svmSpectrumString,  tan,  tanSearch,  treebag,  vbmpRadial,  vglmAdjCat,  vglmContRatio,  vglmCumulative,  widekernelpls,  WM,  wsrf,  xgbDART,  xgbLinear,  xgbTree,  xyf"

set.seed(100)

options(warn=-1)

subsets <- c(1:14)  #tries all possible solutions (i.e., only 1 feature, 2 features, …, 13 features) to find the optimal features

ctrl <- rfeControl(functions = lmFuncs,  #random forest algorithm
                   method = "repeatedcv", #k fold cross validation repeated 5 times
                   repeats = 5,
                   verbose = FALSE, number=10)

lmProfile <- rfe(x=x, y=y,
                 sizes = subsets,
                 rfeControl = ctrl)

lmProfile

## 
## Recursive feature selection
## 
## Outer resampling method: Cross-Validated (10 fold, repeated 5 times) 
## 
## Resampling performance over subset size:
## 
##  Variables  RMSE Rsquared   MAE RMSESD RsquaredSD MAESD Selected
##          1 278.6   0.5873 227.9 105.32     0.3291 92.37         
##          2 278.3   0.5704 227.5 104.63     0.3464 90.66         
##          3 258.1   0.5788 210.3 100.59     0.3482 83.59         
##          4 236.7   0.6251 194.2 111.30     0.3201 94.65        *
##          5 241.7   0.6296 202.8 105.32     0.3129 89.74         
##          6 246.3   0.6179 203.8  99.47     0.3235 85.47         
##          7 250.0   0.6096 207.9  99.28     0.3037 86.60         
##          8 247.0   0.6357 206.0  98.03     0.2959 83.29         
##          9 241.7   0.6222 200.3  97.63     0.3049 81.10         
##         10 243.2   0.6431 203.0 100.97     0.2917 85.88         
##         11 249.8   0.6331 207.9 102.79     0.2962 88.87         
##         12 255.3   0.6027 211.7  97.89     0.3019 84.03         
##         13 258.1   0.6088 213.8  98.64     0.3076 84.67         
##         14 257.9   0.6093 213.6  98.90     0.2985 85.20         
##         15 256.5   0.6110 213.0  99.24     0.2981 85.30         
## 
## The top 4 variables (out of 4):
##    Po1, Po2, Ineq, Ed

only 4 are selected.