Visualizing the Relationships in the Data
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
pairs(mtcars, gap=0.5)

##### Backward Elimination Process

car.lm <- lm(mpg ~ cyl + disp + hp + drat + 
               wt + qsec + vs + am + 
               gear + carb, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am + gear + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4506 -1.6044 -0.1196  1.2193  4.6271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.30337   18.71788   0.657   0.5181  
## cyl         -0.11144    1.04502  -0.107   0.9161  
## disp         0.01334    0.01786   0.747   0.4635  
## hp          -0.02148    0.02177  -0.987   0.3350  
## drat         0.78711    1.63537   0.481   0.6353  
## wt          -3.71530    1.89441  -1.961   0.0633 .
## qsec         0.82104    0.73084   1.123   0.2739  
## vs           0.31776    2.10451   0.151   0.8814  
## am           2.52023    2.05665   1.225   0.2340  
## gear         0.65541    1.49326   0.439   0.6652  
## carb        -0.19942    0.82875  -0.241   0.8122  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.8066 
## F-statistic: 13.93 on 10 and 21 DF,  p-value: 3.793e-07
# Remove the cyl
car.lm <- update(car.lm, .~. - cyl, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + vs + am + gear + 
##     carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4286 -1.5908 -0.0412  1.2120  4.5961 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 10.96007   13.53030   0.810   0.4266  
## disp         0.01283    0.01682   0.763   0.4538  
## hp          -0.02191    0.02091  -1.048   0.3062  
## drat         0.83520    1.53625   0.544   0.5921  
## wt          -3.69251    1.83954  -2.007   0.0572 .
## qsec         0.84244    0.68678   1.227   0.2329  
## vs           0.38975    1.94800   0.200   0.8433  
## am           2.57743    1.94035   1.328   0.1977  
## gear         0.71155    1.36562   0.521   0.6075  
## carb        -0.21958    0.78856  -0.278   0.7833  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.59 on 22 degrees of freedom
## Multiple R-squared:  0.8689, Adjusted R-squared:  0.8153 
## F-statistic: 16.21 on 9 and 22 DF,  p-value: 9.031e-08
# Remove vs
car.lm <- update(car.lm, .~. - vs, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + am + gear + 
##     carb, data = mtcars)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -3.356 -1.576 -0.149  1.218  4.604 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  9.76828   11.89230   0.821   0.4199  
## disp         0.01214    0.01612   0.753   0.4590  
## hp          -0.02095    0.01993  -1.051   0.3040  
## drat         0.87510    1.49113   0.587   0.5630  
## wt          -3.71151    1.79834  -2.064   0.0505 .
## qsec         0.91083    0.58312   1.562   0.1319  
## am           2.52390    1.88128   1.342   0.1928  
## gear         0.75984    1.31577   0.577   0.5692  
## carb        -0.24796    0.75933  -0.327   0.7470  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.535 on 23 degrees of freedom
## Multiple R-squared:  0.8687, Adjusted R-squared:  0.823 
## F-statistic: 19.02 on 8 and 23 DF,  p-value: 2.008e-08
# Remove carb
car.lm <- update(car.lm, .~. - carb, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + am + gear, 
##     data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1200 -1.7753 -0.1446  1.0903  4.7172 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  9.19763   11.54220   0.797  0.43334   
## disp         0.01552    0.01214   1.278  0.21342   
## hp          -0.02471    0.01596  -1.548  0.13476   
## drat         0.81023    1.45007   0.559  0.58151   
## wt          -4.13065    1.23593  -3.342  0.00272 **
## qsec         1.00979    0.48883   2.066  0.04981 * 
## am           2.58980    1.83528   1.411  0.17104   
## gear         0.60644    1.20596   0.503  0.61964   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.488 on 24 degrees of freedom
## Multiple R-squared:  0.8681, Adjusted R-squared:  0.8296 
## F-statistic: 22.56 on 7 and 24 DF,  p-value: 4.218e-09
# Remove gear
car.lm <- update(car.lm, .~. - gear, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ disp + hp + drat + wt + qsec + am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2669 -1.6148 -0.2585  1.1220  4.5564 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 10.71062   10.97539   0.976  0.33848   
## disp         0.01310    0.01098   1.193  0.24405   
## hp          -0.02180    0.01465  -1.488  0.14938   
## drat         1.02065    1.36748   0.746  0.46240   
## wt          -4.04454    1.20558  -3.355  0.00254 **
## qsec         0.99073    0.48002   2.064  0.04955 * 
## am           2.98469    1.63382   1.827  0.07969 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.45 on 25 degrees of freedom
## Multiple R-squared:  0.8667, Adjusted R-squared:  0.8347 
## F-statistic: 27.09 on 6 and 25 DF,  p-value: 8.637e-10
# Remove drat
car.lm <- update(car.lm, .~. - drat, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ disp + hp + wt + qsec + am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5399 -1.7398 -0.3196  1.1676  4.5534 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 14.36190    9.74079   1.474  0.15238   
## disp         0.01124    0.01060   1.060  0.29897   
## hp          -0.02117    0.01450  -1.460  0.15639   
## wt          -4.08433    1.19410  -3.420  0.00208 **
## qsec         1.00690    0.47543   2.118  0.04391 * 
## am           3.47045    1.48578   2.336  0.02749 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.429 on 26 degrees of freedom
## Multiple R-squared:  0.8637, Adjusted R-squared:  0.8375 
## F-statistic: 32.96 on 5 and 26 DF,  p-value: 1.844e-10
# Remove disp
car.lm <- update(car.lm, .~. - disp, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ hp + wt + qsec + am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4975 -1.5902 -0.1122  1.1795  4.5404 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 17.44019    9.31887   1.871  0.07215 . 
## hp          -0.01765    0.01415  -1.247  0.22309   
## wt          -3.23810    0.88990  -3.639  0.00114 **
## qsec         0.81060    0.43887   1.847  0.07573 . 
## am           2.92550    1.39715   2.094  0.04579 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.435 on 27 degrees of freedom
## Multiple R-squared:  0.8579, Adjusted R-squared:  0.8368 
## F-statistic: 40.74 on 4 and 27 DF,  p-value: 4.589e-11
# Remove hp
car.lm <- update(car.lm, .~. - hp, data=mtcars)
summary(car.lm)
## 
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4811 -1.5555 -0.7257  1.4110  4.6610 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.6178     6.9596   1.382 0.177915    
## wt           -3.9165     0.7112  -5.507 6.95e-06 ***
## qsec          1.2259     0.2887   4.247 0.000216 ***
## am            2.9358     1.4109   2.081 0.046716 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared:  0.8497, Adjusted R-squared:  0.8336 
## F-statistic: 52.75 on 3 and 28 DF,  p-value: 1.21e-11
The final model is mpg = 9.6178 - 3.9165wt + 1.2259qsec + 2.9358*am
Residual Analysis
plot(fitted(car.lm), resid(car.lm))

qqnorm(resid(car.lm))
qqline(resid(car.lm))