Stepwise Model Selection

data_set<-read.csv("Advertising.csv")
data_set<-data_set[ , c(2:5)]
data_set
library(caret)
set.seed(1234)
k_flodsTrain<-trainControl(method = "repeatedcv",
                           number = 10,
                           repeats = 5,
                           verboseIter = TRUE)
trainIndexs<-createDataPartition(data_set$TV, p=0.7, list = FALSE)
trainData<-data_set[trainIndexs, ]
testData<-data_set[-trainIndexs, ]
trainData
modelo<-train(Sales ~ TV,
              data = trainData,
              method="lm",
              trControl=k_flodsTrain)
+ Fold01.Rep1: intercept=TRUE 
- Fold01.Rep1: intercept=TRUE 
+ Fold02.Rep1: intercept=TRUE 
- Fold02.Rep1: intercept=TRUE 
+ Fold03.Rep1: intercept=TRUE 
- Fold03.Rep1: intercept=TRUE 
+ Fold04.Rep1: intercept=TRUE 
- Fold04.Rep1: intercept=TRUE 
+ Fold05.Rep1: intercept=TRUE 
- Fold05.Rep1: intercept=TRUE 
+ Fold06.Rep1: intercept=TRUE 
- Fold06.Rep1: intercept=TRUE 
+ Fold07.Rep1: intercept=TRUE 
- Fold07.Rep1: intercept=TRUE 
+ Fold08.Rep1: intercept=TRUE 
- Fold08.Rep1: intercept=TRUE 
+ Fold09.Rep1: intercept=TRUE 
- Fold09.Rep1: intercept=TRUE 
+ Fold10.Rep1: intercept=TRUE 
- Fold10.Rep1: intercept=TRUE 
+ Fold01.Rep2: intercept=TRUE 
- Fold01.Rep2: intercept=TRUE 
+ Fold02.Rep2: intercept=TRUE 
- Fold02.Rep2: intercept=TRUE 
+ Fold03.Rep2: intercept=TRUE 
- Fold03.Rep2: intercept=TRUE 
+ Fold04.Rep2: intercept=TRUE 
- Fold04.Rep2: intercept=TRUE 
+ Fold05.Rep2: intercept=TRUE 
- Fold05.Rep2: intercept=TRUE 
+ Fold06.Rep2: intercept=TRUE 
- Fold06.Rep2: intercept=TRUE 
+ Fold07.Rep2: intercept=TRUE 
- Fold07.Rep2: intercept=TRUE 
+ Fold08.Rep2: intercept=TRUE 
- Fold08.Rep2: intercept=TRUE 
+ Fold09.Rep2: intercept=TRUE 
- Fold09.Rep2: intercept=TRUE 
+ Fold10.Rep2: intercept=TRUE 
- Fold10.Rep2: intercept=TRUE 
+ Fold01.Rep3: intercept=TRUE 
- Fold01.Rep3: intercept=TRUE 
+ Fold02.Rep3: intercept=TRUE 
- Fold02.Rep3: intercept=TRUE 
+ Fold03.Rep3: intercept=TRUE 
- Fold03.Rep3: intercept=TRUE 
+ Fold04.Rep3: intercept=TRUE 
- Fold04.Rep3: intercept=TRUE 
+ Fold05.Rep3: intercept=TRUE 
- Fold05.Rep3: intercept=TRUE 
+ Fold06.Rep3: intercept=TRUE 
- Fold06.Rep3: intercept=TRUE 
+ Fold07.Rep3: intercept=TRUE 
- Fold07.Rep3: intercept=TRUE 
+ Fold08.Rep3: intercept=TRUE 
- Fold08.Rep3: intercept=TRUE 
+ Fold09.Rep3: intercept=TRUE 
- Fold09.Rep3: intercept=TRUE 
+ Fold10.Rep3: intercept=TRUE 
- Fold10.Rep3: intercept=TRUE 
+ Fold01.Rep4: intercept=TRUE 
- Fold01.Rep4: intercept=TRUE 
+ Fold02.Rep4: intercept=TRUE 
- Fold02.Rep4: intercept=TRUE 
+ Fold03.Rep4: intercept=TRUE 
- Fold03.Rep4: intercept=TRUE 
+ Fold04.Rep4: intercept=TRUE 
- Fold04.Rep4: intercept=TRUE 
+ Fold05.Rep4: intercept=TRUE 
- Fold05.Rep4: intercept=TRUE 
+ Fold06.Rep4: intercept=TRUE 
- Fold06.Rep4: intercept=TRUE 
+ Fold07.Rep4: intercept=TRUE 
- Fold07.Rep4: intercept=TRUE 
+ Fold08.Rep4: intercept=TRUE 
- Fold08.Rep4: intercept=TRUE 
+ Fold09.Rep4: intercept=TRUE 
- Fold09.Rep4: intercept=TRUE 
+ Fold10.Rep4: intercept=TRUE 
- Fold10.Rep4: intercept=TRUE 
+ Fold01.Rep5: intercept=TRUE 
- Fold01.Rep5: intercept=TRUE 
+ Fold02.Rep5: intercept=TRUE 
- Fold02.Rep5: intercept=TRUE 
+ Fold03.Rep5: intercept=TRUE 
- Fold03.Rep5: intercept=TRUE 
+ Fold04.Rep5: intercept=TRUE 
- Fold04.Rep5: intercept=TRUE 
+ Fold05.Rep5: intercept=TRUE 
- Fold05.Rep5: intercept=TRUE 
+ Fold06.Rep5: intercept=TRUE 
- Fold06.Rep5: intercept=TRUE 
+ Fold07.Rep5: intercept=TRUE 
- Fold07.Rep5: intercept=TRUE 
+ Fold08.Rep5: intercept=TRUE 
- Fold08.Rep5: intercept=TRUE 
+ Fold09.Rep5: intercept=TRUE 
- Fold09.Rep5: intercept=TRUE 
+ Fold10.Rep5: intercept=TRUE 
- Fold10.Rep5: intercept=TRUE 
Aggregating results
Fitting final model on full training set
modelo
Linear Regression 

140 samples
  1 predictor

No pre-processing
Resampling: Cross-Validated (10 fold, repeated 5 times) 
Summary of sample sizes: 126, 125, 127, 126, 126, 127, ... 
Resampling results:

  RMSE      Rsquared   MAE     
  3.213605  0.6292166  2.565841

Tuning parameter 'intercept' was held constant at a value of TRUE
summary(modelo)

Call:
lm(formula = .outcome ~ ., data = dat)

Residuals:
    Min      1Q  Median      3Q     Max 
-8.2543 -1.8500 -0.1661  2.1735  7.3204 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 6.932358   0.551085   12.58   <2e-16 ***
TV          0.047423   0.003239   14.64   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.266 on 138 degrees of freedom
Multiple R-squared:  0.6083,    Adjusted R-squared:  0.6055 
F-statistic: 214.3 on 1 and 138 DF,  p-value: < 2.2e-16
predict(object = modelo, newdata = testData)
        1         2         8        13        15        23        24        25        27        32        35 
17.844356  9.042675 12.632585  8.061022 16.611361  7.558340 17.758994  9.886802 13.709083 12.286398 11.470725 
       36        37        39        40        43        54        56        58        61        62        63 
20.718180 19.589516  8.976283 17.744768 20.855706 15.591770 16.364763 13.391350  9.469481 19.323948 18.280646 
       66        67        68        70        79        82        83        87        90        92        93 
10.204535  8.426178 13.538361 17.213632  7.188442 18.304357 10.503299 10.550722 12.139387  8.288652 17.256312 
       94        97       100       106       109       116       118       119       125       132       133 
18.830751 16.303113 13.343927 13.471969  7.553598 10.493814 10.555464 12.893410 17.815902 19.508897  7.330710 
      137       138       155       160       164       167       169       170       173       174       177 
 8.146383 19.911992 15.838369 13.177947 14.685994  7.781227 17.147240 20.414674  7.861846 14.918366 18.712194 
      178       181       188       194       198 
15.003727 14.358776 15.994864 14.842489 15.326202 

Forward Selection

null_model<-lm(formula = price ~ 1, data = diamonds)
summary(null_model)

Call:
lm(formula = price ~ 1, data = diamonds)

Residuals:
   Min     1Q Median     3Q    Max 
 -3607  -2983  -1532   1392  14890 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3932.80      17.18     229   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3989 on 53939 degrees of freedom
full_model<-lm(formula = price ~ ., data = diamonds)
summary(full_model)

Call:
lm(formula = price ~ ., data = diamonds)

Residuals:
     Min       1Q   Median       3Q      Max 
-21376.0   -592.4   -183.5    376.4  10694.2 

Coefficients:
             Estimate Std. Error  t value Pr(>|t|)    
(Intercept)  5753.762    396.630   14.507  < 2e-16 ***
carat       11256.978     48.628  231.494  < 2e-16 ***
cut.L         584.457     22.478   26.001  < 2e-16 ***
cut.Q        -301.908     17.994  -16.778  < 2e-16 ***
cut.C         148.035     15.483    9.561  < 2e-16 ***
cut^4         -20.794     12.377   -1.680  0.09294 .  
color.L     -1952.160     17.342 -112.570  < 2e-16 ***
color.Q      -672.054     15.777  -42.597  < 2e-16 ***
color.C      -165.283     14.725  -11.225  < 2e-16 ***
color^4        38.195     13.527    2.824  0.00475 ** 
color^5       -95.793     12.776   -7.498 6.59e-14 ***
color^6       -48.466     11.614   -4.173 3.01e-05 ***
clarity.L    4097.431     30.259  135.414  < 2e-16 ***
clarity.Q   -1925.004     28.227  -68.197  < 2e-16 ***
clarity.C     982.205     24.152   40.668  < 2e-16 ***
clarity^4    -364.918     19.285  -18.922  < 2e-16 ***
clarity^5     233.563     15.752   14.828  < 2e-16 ***
clarity^6       6.883     13.715    0.502  0.61575    
clarity^7      90.640     12.103    7.489 7.06e-14 ***
depth         -63.806      4.535  -14.071  < 2e-16 ***
table         -26.474      2.912   -9.092  < 2e-16 ***
x           -1008.261     32.898  -30.648  < 2e-16 ***
y               9.609     19.333    0.497  0.61918    
z             -50.119     33.486   -1.497  0.13448    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1130 on 53916 degrees of freedom
Multiple R-squared:  0.9198,    Adjusted R-squared:  0.9198 
F-statistic: 2.688e+04 on 23 and 53916 DF,  p-value: < 2.2e-16
forward_selection<-step(
  object = null_model,
  scope = list(lower = null_model, upper = full_model),
  direction = "forward"
)
Start:  AIC=894477.9
price ~ 1

          Df  Sum of Sq        RSS    AIC
+ carat    1 7.2913e+11 1.2935e+11 792389
+ x        1 6.7152e+11 1.8695e+11 812259
+ y        1 6.4296e+11 2.1552e+11 819929
+ z        1 6.3677e+11 2.2170e+11 821454
+ color    6 2.6849e+10 8.3162e+11 892776
+ clarity  7 2.3308e+10 8.3517e+11 893007
+ table    1 1.3876e+10 8.4460e+11 893601
+ cut      4 1.1042e+10 8.4743e+11 893788
+ depth    1 9.7323e+07 8.5838e+11 894474
<none>                  8.5847e+11 894478

Step:  AIC=792389.4
price ~ carat

          Df  Sum of Sq        RSS    AIC
+ clarity  7 3.9082e+10 9.0264e+10 772998
+ color    6 1.2561e+10 1.1678e+11 786891
+ cut      4 6.1332e+09 1.2321e+11 789777
+ x        1 3.5206e+09 1.2583e+11 790903
+ z        1 2.8493e+09 1.2650e+11 791190
+ table    1 1.4377e+09 1.2791e+11 791789
+ y        1 1.2425e+09 1.2810e+11 791871
+ depth    1 1.1546e+09 1.2819e+11 791908
<none>                  1.2935e+11 792389

Step:  AIC=772998.5
price ~ carat + clarity

        Df  Sum of Sq        RSS    AIC
+ color  6 1.6402e+10 7.3862e+10 762193
+ x      1 1.8542e+09 8.8410e+10 771881
+ cut    4 1.7808e+09 8.8483e+10 771932
+ z      1 1.4814e+09 8.8783e+10 772108
+ y      1 7.4127e+08 8.9523e+10 772556
+ table  1 3.7751e+08 8.9886e+10 772774
+ depth  1 3.5822e+08 8.9906e+10 772786
<none>                9.0264e+10 772998

Step:  AIC=762193.4
price ~ carat + clarity + color

        Df  Sum of Sq        RSS    AIC
+ x      1 2733710969 7.1128e+10 760161
+ z      1 1842294631 7.2020e+10 760833
+ cut    4 1699187372 7.2163e+10 760946
+ y      1 1145039064 7.2717e+10 761353
+ table  1  409645878 7.3452e+10 761895
+ depth  1  174658715 7.3687e+10 762068
<none>                7.3862e+10 762193

Step:  AIC=760161.1
price ~ carat + clarity + color + x

        Df  Sum of Sq        RSS    AIC
+ cut    4 1918248123 6.9210e+10 758694
+ depth  1  722282102 7.0406e+10 759613
+ table  1  273738191 7.0855e+10 759955
+ z      1  199547343 7.0929e+10 760012
+ y      1    5354253 7.1123e+10 760159
<none>                7.1128e+10 760161

Step:  AIC=758694.4
price ~ carat + clarity + color + x + cut

        Df Sum of Sq        RSS    AIC
+ depth  1 244682865 6.8965e+10 758505
+ z      1  72666922 6.9137e+10 758640
+ table  1   9935285 6.9200e+10 758689
<none>               6.9210e+10 758694
+ y      1    982101 6.9209e+10 758696

Step:  AIC=758505.4
price ~ carat + clarity + color + x + cut + depth

        Df Sum of Sq        RSS    AIC
+ table  1 105497218 6.8860e+10 758425
<none>               6.8965e+10 758505
+ z      1   2323719 6.8963e+10 758506
+ y      1    298553 6.8965e+10 758507

Step:  AIC=758424.8
price ~ carat + clarity + color + x + cut + depth + table

       Df Sum of Sq        RSS    AIC
+ z     1   2662170 6.8857e+10 758425
<none>              6.8860e+10 758425
+ y     1    116788 6.8860e+10 758427

Step:  AIC=758424.7
price ~ carat + clarity + color + x + cut + depth + table + z

       Df Sum of Sq        RSS    AIC
<none>              6.8857e+10 758425
+ y     1    315487 6.8857e+10 758426
forward_selection

Call:
lm(formula = price ~ carat + clarity + color + x + cut + depth + 
    table + z, data = diamonds)

Coefficients:
(Intercept)        carat    clarity.L    clarity.Q    clarity.C    clarity^4    clarity^5    clarity^6  
   5768.782    11257.752     4097.613    -1925.133      982.322     -364.976      233.635        6.871  
  clarity^7      color.L      color.Q      color.C      color^4      color^5      color^6            x  
     90.622    -1952.179     -672.075     -165.277       38.193      -95.780      -48.452    -1000.354  
      cut.L        cut.Q        cut.C        cut^4        depth        table            z  
    584.600     -302.211      148.446      -20.619      -64.003      -26.501      -47.925  

Backward Elimination

backward_elimination<-step(
  object = full_model,
  scope = list(lower = null_model, upper = full_model),
  direction = "backward"
)
Start:  AIC=758426.5
price ~ carat + cut + color + clarity + depth + table + x + y + 
    z

          Df  Sum of Sq        RSS    AIC
- y        1 3.1549e+05 6.8857e+10 758425
<none>                  6.8857e+10 758426
- z        1 2.8609e+06 6.8860e+10 758427
- table    1 1.0558e+08 6.8962e+10 758507
- depth    1 2.5286e+08 6.9110e+10 758622
- cut      4 8.6357e+08 6.9720e+10 759091
- x        1 1.1996e+09 7.0056e+10 759356
- color    6 1.7082e+10 8.5939e+10 770368
- clarity  7 3.5703e+10 1.0456e+11 780945
- carat    1 6.8440e+10 1.3730e+11 795649

Step:  AIC=758424.7
price ~ carat + cut + color + clarity + depth + table + x + z

          Df  Sum of Sq        RSS    AIC
<none>                  6.8857e+10 758425
- z        1 2.6622e+06 6.8860e+10 758425
- table    1 1.0584e+08 6.8963e+10 758506
- depth    1 2.5637e+08 6.9114e+10 758623
- cut      4 8.6409e+08 6.9721e+10 759089
- x        1 1.5413e+09 7.0398e+10 759617
- color    6 1.7082e+10 8.5940e+10 770366
- clarity  7 3.5708e+10 1.0457e+11 780946
- carat    1 6.8520e+10 1.3738e+11 795679
backward_elimination

Call:
lm(formula = price ~ carat + cut + color + clarity + depth + 
    table + x + z, data = diamonds)

Coefficients:
(Intercept)        carat        cut.L        cut.Q        cut.C        cut^4      color.L      color.Q  
   5768.782    11257.752      584.600     -302.211      148.446      -20.619    -1952.179     -672.075  
    color.C      color^4      color^5      color^6    clarity.L    clarity.Q    clarity.C    clarity^4  
   -165.277       38.193      -95.780      -48.452     4097.613    -1925.133      982.322     -364.976  
  clarity^5    clarity^6    clarity^7        depth        table            x            z  
    233.635        6.871       90.622      -64.003      -26.501    -1000.354      -47.925  
library(ggplot2)
diamonds
LS0tDQp0aXRsZTogIkRhdGEgTWluaW5nICYgQmlnIERhdGEgLSBDbGFzZSA0Ig0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMjIFN0ZXB3aXNlIE1vZGVsIFNlbGVjdGlvbg0KDQpgYGB7cn0NCmRhdGFfc2V0PC1yZWFkLmNzdigiQWR2ZXJ0aXNpbmcuY3N2IikNCmRhdGFfc2V0PC1kYXRhX3NldFsgLCBjKDI6NSldDQpkYXRhX3NldA0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeShjYXJldCkNCmBgYA0KDQpgYGB7cn0NCnNldC5zZWVkKDEyMzQpDQprX2Zsb2RzVHJhaW48LXRyYWluQ29udHJvbChtZXRob2QgPSAicmVwZWF0ZWRjdiIsDQogICAgICAgICAgICAgICAgICAgICAgICAgICBudW1iZXIgPSAxMCwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgIHJlcGVhdHMgPSA1LA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgdmVyYm9zZUl0ZXIgPSBUUlVFKQ0KYGBgDQoNCg0KYGBge3J9DQp0cmFpbkluZGV4czwtY3JlYXRlRGF0YVBhcnRpdGlvbihkYXRhX3NldCRUViwgcD0wLjcsIGxpc3QgPSBGQUxTRSkNCnRyYWluRGF0YTwtZGF0YV9zZXRbdHJhaW5JbmRleHMsIF0NCnRlc3REYXRhPC1kYXRhX3NldFstdHJhaW5JbmRleHMsIF0NCnRyYWluRGF0YQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWxvPC10cmFpbihTYWxlcyB+IFRWLA0KICAgICAgICAgICAgICBkYXRhID0gdHJhaW5EYXRhLA0KICAgICAgICAgICAgICBtZXRob2Q9ImxtIiwNCiAgICAgICAgICAgICAgdHJDb250cm9sPWtfZmxvZHNUcmFpbikNCm1vZGVsbw0KYGBgDQoNCg0KYGBge3J9DQpzdW1tYXJ5KG1vZGVsbykNCmBgYA0KDQpgYGB7cn0NCnByZWRpY3Qob2JqZWN0ID0gbW9kZWxvLCBuZXdkYXRhID0gdGVzdERhdGEpDQpgYGANCg0KIyMjIEZvcndhcmQgU2VsZWN0aW9uDQoNCg0KYGBge3J9DQpudWxsX21vZGVsPC1sbShmb3JtdWxhID0gcHJpY2UgfiAxLCBkYXRhID0gZGlhbW9uZHMpDQpzdW1tYXJ5KG51bGxfbW9kZWwpDQpgYGANCg0KYGBge3J9DQpmdWxsX21vZGVsPC1sbShmb3JtdWxhID0gcHJpY2UgfiAuLCBkYXRhID0gZGlhbW9uZHMpDQpzdW1tYXJ5KGZ1bGxfbW9kZWwpDQpgYGANCg0KDQpgYGB7cn0NCmZvcndhcmRfc2VsZWN0aW9uPC1zdGVwKA0KICBvYmplY3QgPSBudWxsX21vZGVsLA0KICBzY29wZSA9IGxpc3QobG93ZXIgPSBudWxsX21vZGVsLCB1cHBlciA9IGZ1bGxfbW9kZWwpLA0KICBkaXJlY3Rpb24gPSAiZm9yd2FyZCINCikNCmZvcndhcmRfc2VsZWN0aW9uDQpgYGANCg0KDQojIyMgQmFja3dhcmQgRWxpbWluYXRpb24NCmBgYHtyfQ0KYmFja3dhcmRfZWxpbWluYXRpb248LXN0ZXAoDQogIG9iamVjdCA9IGZ1bGxfbW9kZWwsDQogIHNjb3BlID0gbGlzdChsb3dlciA9IG51bGxfbW9kZWwsIHVwcGVyID0gZnVsbF9tb2RlbCksDQogIGRpcmVjdGlvbiA9ICJiYWNrd2FyZCINCikNCmJhY2t3YXJkX2VsaW1pbmF0aW9uDQpgYGANCg0KDQpgYGB7cn0NCmxpYnJhcnkoZ2dwbG90MikNCmRpYW1vbmRzDQpgYGANCg0K