Read in the data

Note that GENDER = 1 corresponds to Male and GENDER = 0 corresponds to Female.

ES$GENDER <- factor(ES$GENDER, labels = c("Female", "Male"))
DT::datatable(ES)

Consider the model fit from Example 4.20.

mod420 <- lm(LNSAL ~ ., data = ES[, -c(1)])
summary(mod420)

Call:
lm(formula = LNSAL ~ ., data = ES[, -c(1)])

Residuals:
      Min        1Q    Median        3Q       Max 
-0.163466 -0.048971 -0.001111  0.041345  0.124534 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.862e+00  9.703e-02 101.634  < 2e-16 ***
EXP          4.364e-02  3.761e-03  11.604  < 2e-16 ***
EDUC         3.094e-02  2.950e-03  10.487  < 2e-16 ***
GENDERMale   1.166e-01  3.696e-02   3.155  0.00217 ** 
NUMSUP       3.259e-04  7.850e-05   4.152 7.36e-05 ***
ASSETS       2.391e-03  4.439e-04   5.386 5.49e-07 ***
EXPSQ       -6.347e-04  1.384e-04  -4.588 1.41e-05 ***
GEN_SUP      3.020e-04  9.239e-05   3.269  0.00152 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.06596 on 92 degrees of freedom
Multiple R-squared:  0.9401,    Adjusted R-squared:  0.9355 
F-statistic: 206.3 on 7 and 92 DF,  p-value: < 2.2e-16
# This is the same as
mod420B <- lm(LNSAL ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + EXPSQ + GEN_SUP, data = ES)
summary(mod420B)

Call:
lm(formula = LNSAL ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + 
    EXPSQ + GEN_SUP, data = ES)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.163466 -0.048971 -0.001111  0.041345  0.124534 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.862e+00  9.703e-02 101.634  < 2e-16 ***
EXP          4.364e-02  3.761e-03  11.604  < 2e-16 ***
EDUC         3.094e-02  2.950e-03  10.487  < 2e-16 ***
GENDERMale   1.166e-01  3.696e-02   3.155  0.00217 ** 
NUMSUP       3.259e-04  7.850e-05   4.152 7.36e-05 ***
ASSETS       2.391e-03  4.439e-04   5.386 5.49e-07 ***
EXPSQ       -6.347e-04  1.384e-04  -4.588 1.41e-05 ***
GEN_SUP      3.020e-04  9.239e-05   3.269  0.00152 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.06596 on 92 degrees of freedom
Multiple R-squared:  0.9401,    Adjusted R-squared:  0.9355 
F-statistic: 206.3 on 7 and 92 DF,  p-value: < 2.2e-16

Slighlty augmented data

ES2 <- read.table("EXECSAL2.txt", header = TRUE)
DT::datatable(ES2)
colnames(ES2) <- c("ID", "LogSal", "EXP", "EDUC", "GENDER", "NUMSUP", "ASSETS", "BOARD", "AGE", "PROFITS", "IEXP", "TSALES")
ES2$GENDER <- factor(ES2$GENDER, labels = c("Female", "Male"))
ES2$BOARD <- factor(ES2$BOARD, labels = c("No", "Yes"))
ES2$IEXP <- factor(ES2$IEXP, labels = c("No", "Yes"))
DT::datatable(ES2, rownames = FALSE)

Forward Stepwise Regression

FM <- lm(LogSal ~ 1, data = ES2[, -c(1)])
summary(FM)

Call:
lm(formula = LogSal ~ 1, data = ES2[, -c(1)])

Residuals:
     Min       1Q   Median       3Q      Max 
-0.79072 -0.17304  0.00768  0.15298  0.60838 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 11.45502    0.02598   440.9   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.2598 on 99 degrees of freedom
step(FM, scope = LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + BOARD + AGE + PROFITS + IEXP + TSALES, direction = "forward")
Start:  AIC=-268.57
LogSal ~ 1

          Df Sum of Sq    RSS     AIC
+ EXP      1    4.1364 2.5462 -363.06
+ AGE      1    2.6488 4.0338 -317.05
+ GENDER   1    1.0492 5.6335 -283.64
+ EDUC     1    0.3264 6.3563 -271.57
+ NUMSUP   1    0.2897 6.3930 -271.00
+ ASSETS   1    0.2774 6.4052 -270.81
<none>                 6.6827 -268.57
+ TSALES   1    0.0201 6.6625 -266.87
+ PROFITS  1    0.0181 6.6646 -266.84
+ BOARD    1    0.0169 6.6657 -266.82
+ IEXP     1    0.0002 6.6824 -266.57

Step:  AIC=-363.06
LogSal ~ EXP

          Df Sum of Sq    RSS     AIC
+ GENDER   1   0.87027 1.6760 -402.88
+ EDUC     1   0.32522 2.2210 -374.72
+ NUMSUP   1   0.31253 2.2337 -374.15
+ ASSETS   1   0.26811 2.2781 -372.18
<none>                 2.5462 -363.06
+ BOARD    1   0.04591 2.5003 -362.87
+ TSALES   1   0.04132 2.5049 -362.69
+ PROFITS  1   0.01466 2.5316 -361.63
+ AGE      1   0.00843 2.5378 -361.39
+ IEXP     1   0.00381 2.5424 -361.21

Step:  AIC=-402.88
LogSal ~ EXP + GENDER

          Df Sum of Sq    RSS     AIC
+ NUMSUP   1   0.60068 1.0753 -445.26
+ EDUC     1   0.28150 1.3945 -419.27
+ ASSETS   1   0.19195 1.4840 -413.04
+ BOARD    1   0.10205 1.5739 -407.16
<none>                 1.6760 -402.88
+ PROFITS  1   0.00735 1.6686 -401.32
+ TSALES   1   0.00137 1.6746 -400.96
+ IEXP     1   0.00022 1.6757 -400.89
+ AGE      1   0.00000 1.6760 -400.88

Step:  AIC=-445.26
LogSal ~ EXP + GENDER + NUMSUP

          Df Sum of Sq     RSS     AIC
+ EDUC     1   0.45697 0.61832 -498.59
+ ASSETS   1   0.11593 0.95936 -454.67
+ BOARD    1   0.02841 1.04688 -445.94
<none>                 1.07529 -445.26
+ AGE      1   0.00623 1.06906 -443.84
+ PROFITS  1   0.00622 1.06907 -443.84
+ TSALES   1   0.00044 1.07485 -443.30
+ IEXP     1   0.00003 1.07526 -443.26

Step:  AIC=-498.59
LogSal ~ EXP + GENDER + NUMSUP + EDUC

          Df Sum of Sq     RSS     AIC
+ ASSETS   1  0.087902 0.53041 -511.93
<none>                 0.61832 -498.59
+ BOARD    1  0.009688 0.60863 -498.17
+ IEXP     1  0.002451 0.61587 -496.99
+ PROFITS  1  0.001376 0.61694 -496.82
+ AGE      1  0.000343 0.61797 -496.65
+ TSALES   1  0.000000 0.61832 -496.59

Step:  AIC=-511.93
LogSal ~ EXP + GENDER + NUMSUP + EDUC + ASSETS

          Df Sum of Sq     RSS     AIC
<none>                 0.53041 -511.93
+ IEXP     1 0.0092875 0.52113 -511.69
+ BOARD    1 0.0037568 0.52666 -510.64
+ TSALES   1 0.0003588 0.53006 -509.99
+ PROFITS  1 0.0002463 0.53017 -509.97
+ AGE      1 0.0000122 0.53040 -509.93

Call:
lm(formula = LogSal ~ EXP + GENDER + NUMSUP + EDUC + ASSETS, 
    data = ES2[, -c(1)])

Coefficients:
(Intercept)          EXP   GENDERMale       NUMSUP         EDUC  
  9.9619345    0.0272762    0.2246932    0.0005244    0.0290921  
     ASSETS  
  0.0019623  

Backward Stepwise Regression

FM <- lm(LogSal ~ ., data = ES2[, -c(1)])
summary(FM)

Call:
lm(formula = LogSal ~ ., data = ES2[, -c(1)])

Residuals:
      Min        1Q    Median        3Q       Max 
-0.201770 -0.050464  0.004435  0.046826  0.185952 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.002e+01  1.481e-01  67.692  < 2e-16 ***
EXP          2.792e-02  1.773e-03  15.745  < 2e-16 ***
EDUC         2.903e-02  3.426e-03   8.475 4.57e-13 ***
GENDERMale   2.243e-01  1.708e-02  13.135  < 2e-16 ***
NUMSUP       5.140e-04  4.922e-05  10.443  < 2e-16 ***
ASSETS       2.048e-03  5.250e-04   3.901 0.000186 ***
BOARDYes    -1.538e-02  1.686e-02  -0.912 0.364124    
AGE         -5.097e-04  1.438e-03  -0.355 0.723795    
PROFITS     -2.633e-03  5.128e-03  -0.513 0.608896    
IEXPYes     -2.656e-02  2.037e-02  -1.304 0.195613    
TSALES      -9.774e-04  2.959e-03  -0.330 0.741955    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.07608 on 89 degrees of freedom
Multiple R-squared:  0.9229,    Adjusted R-squared:  0.9142 
F-statistic: 106.5 on 10 and 89 DF,  p-value: < 2.2e-16
step(FM, scope = LogSal ~ ., direction = "backward")
Start:  AIC=-504.84
LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + BOARD + AGE + 
    PROFITS + IEXP + TSALES

          Df Sum of Sq     RSS     AIC
- TSALES   1   0.00063 0.51583 -506.71
- AGE      1   0.00073 0.51593 -506.70
- PROFITS  1   0.00153 0.51673 -506.54
- BOARD    1   0.00482 0.52002 -505.91
- IEXP     1   0.00984 0.52504 -504.94
<none>                 0.51520 -504.84
- ASSETS   1   0.08810 0.60330 -491.05
- EDUC     1   0.41581 0.93102 -447.66
- NUMSUP   1   0.63133 1.14653 -426.84
- GENDER   1   0.99872 1.51393 -399.05
- EXP      1   1.43512 1.95032 -373.72

Step:  AIC=-506.71
LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + BOARD + AGE + 
    PROFITS + IEXP

          Df Sum of Sq     RSS     AIC
- AGE      1   0.00050 0.51633 -508.62
- PROFITS  1   0.00149 0.51732 -508.43
- BOARD    1   0.00448 0.52031 -507.85
- IEXP     1   0.00992 0.52575 -506.81
<none>                 0.51583 -506.71
- ASSETS   1   0.08769 0.60352 -493.01
- EDUC     1   0.41593 0.93176 -449.59
- NUMSUP   1   0.63878 1.15461 -428.14
- GENDER   1   1.03375 1.54959 -398.72
- EXP      1   1.52826 2.04409 -371.02

Step:  AIC=-508.62
LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + BOARD + PROFITS + 
    IEXP

          Df Sum of Sq    RSS     AIC
- PROFITS  1    0.0015 0.5178 -510.33
- BOARD    1    0.0040 0.5203 -509.85
- IEXP     1    0.0096 0.5260 -508.77
<none>                 0.5163 -508.62
- ASSETS   1    0.0898 0.6061 -494.58
- EDUC     1    0.4243 0.9406 -450.64
- NUMSUP   1    0.6384 1.1547 -430.13
- GENDER   1    1.0503 1.5666 -399.62
- EXP      1    3.9764 4.4927 -294.27

Step:  AIC=-510.33
LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + BOARD + IEXP

         Df Sum of Sq    RSS     AIC
- BOARD   1    0.0033 0.5211 -511.69
- IEXP    1    0.0089 0.5267 -510.64
<none>                0.5178 -510.33
- ASSETS  1    0.0885 0.6064 -496.55
- EDUC    1    0.4230 0.9408 -452.62
- NUMSUP  1    0.6420 1.1598 -431.69
- GENDER  1    1.0490 1.5668 -401.61
- EXP     1    3.9749 4.4927 -296.27

Step:  AIC=-511.69
LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS + IEXP

         Df Sum of Sq    RSS     AIC
- IEXP    1    0.0093 0.5304 -511.93
<none>                0.5211 -511.69
- ASSETS  1    0.0947 0.6159 -496.99
- EDUC    1    0.4347 0.9558 -453.04
- NUMSUP  1    0.6868 1.2079 -429.63
- GENDER  1    1.0466 1.5677 -403.55
- EXP     1    3.9718 4.4929 -298.27

Step:  AIC=-511.93
LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS

         Df Sum of Sq    RSS     AIC
<none>                0.5304 -511.93
- ASSETS  1    0.0879 0.6183 -498.59
- EDUC    1    0.4289 0.9594 -454.67
- NUMSUP  1    0.6908 1.2212 -430.53
- GENDER  1    1.0656 1.5961 -403.76
- EXP     1    3.9627 4.4932 -300.26

Call:
lm(formula = LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS, 
    data = ES2[, -c(1)])

Coefficients:
(Intercept)          EXP         EDUC   GENDERMale       NUMSUP  
  9.9619345    0.0272762    0.0290921    0.2246932    0.0005244  
     ASSETS  
  0.0019623  

Using leaps

library(leaps)
ans <- regsubsets(LogSal ~ ., data = ES2[, -c(1)], nvmax = 10)
summary(ans)
Subset selection object
Call: regsubsets.formula(LogSal ~ ., data = ES2[, -c(1)], nvmax = 10)
10 Variables  (and intercept)
           Forced in Forced out
EXP            FALSE      FALSE
EDUC           FALSE      FALSE
GENDERMale     FALSE      FALSE
NUMSUP         FALSE      FALSE
ASSETS         FALSE      FALSE
BOARDYes       FALSE      FALSE
AGE            FALSE      FALSE
PROFITS        FALSE      FALSE
IEXPYes        FALSE      FALSE
TSALES         FALSE      FALSE
1 subsets of each size up to 10
Selection Algorithm: exhaustive
          EXP EDUC GENDERMale NUMSUP ASSETS BOARDYes AGE PROFITS IEXPYes
1  ( 1 )  "*" " "  " "        " "    " "    " "      " " " "     " "    
2  ( 1 )  "*" " "  "*"        " "    " "    " "      " " " "     " "    
3  ( 1 )  "*" " "  "*"        "*"    " "    " "      " " " "     " "    
4  ( 1 )  "*" "*"  "*"        "*"    " "    " "      " " " "     " "    
5  ( 1 )  "*" "*"  "*"        "*"    "*"    " "      " " " "     " "    
6  ( 1 )  "*" "*"  "*"        "*"    "*"    " "      " " " "     "*"    
7  ( 1 )  "*" "*"  "*"        "*"    "*"    "*"      " " " "     "*"    
8  ( 1 )  "*" "*"  "*"        "*"    "*"    "*"      " " "*"     "*"    
9  ( 1 )  "*" "*"  "*"        "*"    "*"    "*"      "*" "*"     "*"    
10  ( 1 ) "*" "*"  "*"        "*"    "*"    "*"      "*" "*"     "*"    
          TSALES
1  ( 1 )  " "   
2  ( 1 )  " "   
3  ( 1 )  " "   
4  ( 1 )  " "   
5  ( 1 )  " "   
6  ( 1 )  " "   
7  ( 1 )  " "   
8  ( 1 )  " "   
9  ( 1 )  " "   
10  ( 1 ) "*"   
par(mfrow = c(2, 2))
plot(ans, scale = "Cp")
plot(ans, scale = "adjr2")
plot(ans, scale = "r2")
plot(ans, scale = "bic")

par(mfrow= c(1, 1))

All together now.

DF <- data.frame(R2 = summary(ans)$rsq, Cp = summary(ans)$cp, BIC = summary(ans)$bic, R2adj = summary(ans)$adjr2)
DT::datatable(round(DF, 4))

Cross Validation

See http://robjhyndman.com/hyndsight/crossvalidation/ and http://stat-ata-asu.github.io/STT3851ClassRepo/Rmarkdown/Cross-ValidationInClassHO.pdf

Graphs

library(car)
Warning in stats::runif(1L): '.Random.seed' is not an integer vector but of
type 'NULL', so ignored
finmod <- lm(LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS, data = ES2)
summary(finmod)

Call:
lm(formula = LogSal ~ EXP + EDUC + GENDER + NUMSUP + ASSETS, 
    data = ES2)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.201219 -0.056016 -0.003581  0.053656  0.187251 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 9.9619345  0.1010567  98.578  < 2e-16 ***
EXP         0.0272762  0.0010293  26.501  < 2e-16 ***
EDUC        0.0290921  0.0033367   8.719 9.71e-14 ***
GENDERMale  0.2246932  0.0163503  13.742  < 2e-16 ***
NUMSUP      0.0005244  0.0000474  11.064  < 2e-16 ***
ASSETS      0.0019623  0.0004972   3.947 0.000153 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.07512 on 94 degrees of freedom
Multiple R-squared:  0.9206,    Adjusted R-squared:  0.9164 
F-statistic: 218.1 on 5 and 94 DF,  p-value: < 2.2e-16
residualPlots(finmod)

           Test stat Pr(>|t|)
EXP           -4.173    0.000
EDUC          -1.115    0.268
GENDER            NA       NA
NUMSUP         0.227    0.821
ASSETS        -0.384    0.702
Tukey test    -1.622    0.105
influenceIndexPlot(finmod)

influencePlot(finmod)

     StudRes        Hat     CookD
40 -2.932805 0.09830851 0.3802668
71 -1.004007 0.12818992 0.1571660