Cars <- read.csv("E:\\DataScience Yogesh\\R _Codes\\Multilinear Regression\\Cars.csv") # choose the Cars.csv data set
View(Cars)
attach(Cars)
# Exploratory Data Analysis(60% of time)
# 1. Measures of Central Tendency
# 2. Measures of Dispersion
# 3. Third Moment Business decision
# 4. Fourth Moment Business decision
# 5. Probability distributions of variables
# 6. Graphical representations
  #  > Histogram,Box plot,Dot plot,Stem & Leaf plot, 
  #     Bar plot

summary(Cars)
##        HP             MPG             VOL               SP        
##  Min.   : 49.0   Min.   :12.10   Min.   : 50.00   Min.   : 99.56  
##  1st Qu.: 84.0   1st Qu.:27.86   1st Qu.: 89.00   1st Qu.:113.83  
##  Median :100.0   Median :35.15   Median :101.00   Median :118.21  
##  Mean   :117.5   Mean   :34.42   Mean   : 98.77   Mean   :121.54  
##  3rd Qu.:140.0   3rd Qu.:39.53   3rd Qu.:113.00   3rd Qu.:126.40  
##  Max.   :322.0   Max.   :53.70   Max.   :160.00   Max.   :169.60  
##        WT       
##  Min.   :15.71  
##  1st Qu.:29.59  
##  Median :32.73  
##  Mean   :32.41  
##  3rd Qu.:37.39  
##  Max.   :53.00
# 7. Find the correlation b/n Output (MPG) & (HP,VOL,SP)-Scatter plot
windows()
plot(Cars)

# 8. Correlation Coefficient matrix - Strength & Direction of Correlation
cor(Cars)
##              HP        MPG         VOL         SP          WT
## HP   1.00000000 -0.7250383  0.07745947  0.9738481  0.07651307
## MPG -0.72503835  1.0000000 -0.52905658 -0.6871246 -0.52675909
## VOL  0.07745947 -0.5290566  1.00000000  0.1021700  0.99920308
## SP   0.97384807 -0.6871246  0.10217001  1.0000000  0.10243919
## WT   0.07651307 -0.5267591  0.99920308  0.1024392  1.00000000
# The Linear Model of interest
m1 <- lm(MPG ~ VOL + HP+ SP + WT)

summary(m1)
## 
## Call:
## lm(formula = MPG ~ VOL + HP + SP + WT)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6320 -2.9944 -0.3705  2.2149 15.6179 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 30.67734   14.90030   2.059   0.0429 *  
## VOL         -0.33605    0.56864  -0.591   0.5563    
## HP          -0.20544    0.03922  -5.239  1.4e-06 ***
## SP           0.39563    0.15826   2.500   0.0146 *  
## WT           0.40057    1.69346   0.237   0.8136    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.488 on 76 degrees of freedom
## Multiple R-squared:  0.7705, Adjusted R-squared:  0.7585 
## F-statistic:  63.8 on 4 and 76 DF,  p-value: < 2.2e-16
# Prediction based on only Volume 
model.carV <- lm(MPG ~ VOL)
summary(model.carV) # Volume became significant
## 
## Call:
## lm(formula = MPG ~ VOL)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.3074  -5.2026   0.1902   5.4536  17.1632 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 55.81709    3.95696  14.106  < 2e-16 ***
## VOL         -0.21662    0.03909  -5.541 3.82e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.798 on 79 degrees of freedom
## Multiple R-squared:  0.2799, Adjusted R-squared:  0.2708 
## F-statistic: 30.71 on 1 and 79 DF,  p-value: 3.823e-07
# Prediction based on only Weight
model.carW<-lm(MPG ~ WT)
summary(model.carW) # Weight became significant
## 
## Call:
## lm(formula = MPG ~ WT)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.3933  -5.4377   0.2738   5.2951  16.9351 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  55.2296     3.8761  14.249  < 2e-16 ***
## WT           -0.6420     0.1165  -5.508 4.38e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.811 on 79 degrees of freedom
## Multiple R-squared:  0.2775, Adjusted R-squared:  0.2683 
## F-statistic: 30.34 on 1 and 79 DF,  p-value: 4.383e-07
# Prediction based on Volume and Weight
model.carVW<-lm(MPG ~VOL+WT)
summary(model.carVW) # Both became Insignificant
## 
## Call:
## lm(formula = MPG ~ VOL + WT)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.9939  -4.9460   0.0028   5.3905  17.6972 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  56.8847     4.5342   12.55   <2e-16 ***
## VOL          -0.6983     0.9841   -0.71    0.480    
## WT            1.4349     2.9291    0.49    0.626    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.835 on 78 degrees of freedom
## Multiple R-squared:  0.2821, Adjusted R-squared:  0.2637 
## F-statistic: 15.33 on 2 and 78 DF,  p-value: 2.434e-06
# It is Better to delete influential observations rather than deleting entire column which is 
# costliest process
# Deletion Diagnostics for identifying influential observations
influence.measures(m1)
## Influence measures of
##   lm(formula = MPG ~ VOL + HP + SP + WT) :
## 
##       dfb.1_  dfb.VOL    dfb.HP    dfb.SP    dfb.WT    dffit cov.r
## 1   0.027438  0.34491 -0.064490 -1.06e-02 -0.348732  0.56724 0.774
## 2   0.130255 -0.22541  0.035945 -8.57e-02  0.224097  0.37945 0.913
## 3   0.090410 -0.03970  0.010673 -6.06e-02  0.038239  0.30826 0.896
## 4  -0.050842 -0.12604 -0.086513  7.38e-02  0.124908  0.19016 1.103
## 5   0.086150  0.17799  0.014157 -7.17e-02 -0.179572  0.39154 0.874
## 6  -0.038227 -0.09931 -0.068912  5.82e-02  0.097995  0.15449 1.108
## 7   0.106859 -0.11741  0.021234 -7.10e-02  0.116017  0.32649 0.901
## 8  -0.065348 -0.01492 -0.044642  5.30e-02  0.017852 -0.11238 1.193
## 9  -0.093081  0.05912 -0.062806  7.25e-02 -0.055744 -0.14161 1.213
## 10 -0.031771 -0.00857 -0.041975  3.74e-02  0.008142  0.06104 1.095
## 11  0.010849 -0.05995 -0.018542  5.55e-03  0.058726  0.12152 1.064
## 12  0.185184 -0.24251  0.269479 -2.60e-01  0.264240 -0.67399 0.909
## 13  0.029117  0.01302  0.032757 -3.20e-02 -0.012894 -0.04051 1.157
## 14  0.011711 -0.06396 -0.017970  4.99e-03  0.062745  0.12343 1.066
## 15  0.054751 -0.07598  0.013861 -3.43e-02  0.074656  0.16218 1.046
## 16  0.024610 -0.12416 -0.009612 -3.19e-03  0.122996  0.16212 1.092
## 17 -0.035743  0.04293 -0.043967  3.84e-02 -0.043618  0.07590 1.103
## 18  0.109967  0.11288  0.222667 -2.14e-01 -0.091298 -0.63072 0.889
## 19 -0.021167 -0.01180 -0.032416  2.75e-02  0.011223  0.05655 1.091
## 20 -0.034346  0.03309 -0.035729  3.34e-02 -0.033011  0.05350 1.120
## 21  0.005188 -0.00310  0.005631 -5.32e-03  0.003118 -0.00748 1.111
## 22 -0.009172  0.13117 -0.027690  1.91e-03 -0.128786  0.22782 0.997
## 23 -0.017641  0.02021 -0.017551  1.69e-02 -0.020237  0.02750 1.157
## 24 -0.000285 -0.02650 -0.007742  4.68e-03  0.026417  0.03917 1.104
## 25  0.048301 -0.05565  0.052463 -5.01e-02  0.056583 -0.08596 1.116
## 26 -0.082883  0.01903  0.016880 -2.74e-03  0.000512 -0.55124 0.897
## 27 -0.158459  0.14820 -0.151772  1.46e-01 -0.146183  0.22349 1.146
## 28  0.008111  0.00955  0.010392 -9.73e-03 -0.009524 -0.01799 1.117
## 29  0.323026 -0.18454  0.261335 -3.06e-01  0.187890  0.40784 1.073
## 30  0.013519  0.01685  0.015781 -1.53e-02 -0.017023 -0.02991 1.119
## 31 -0.037571  0.20218 -0.035690  1.77e-02 -0.198784  0.25885 1.053
## 32  0.026770 -0.01523  0.015135 -2.32e-02  0.015672  0.07028 1.072
## 33  0.020540 -0.03504  0.020495 -1.95e-02  0.035187 -0.04458 1.122
## 34 -0.030604 -0.04250 -0.035152  3.35e-02  0.043347  0.07496 1.115
## 35  0.010936  0.09112  0.005937 -1.56e-02 -0.090704  0.11893 1.103
## 36 -0.001573  0.00893 -0.002726  1.58e-03 -0.008936  0.01476 1.094
## 37 -0.009721 -0.00385 -0.002428  5.92e-03  0.004414 -0.04023 1.082
## 38 -0.017382 -0.08410 -0.004671  1.08e-02  0.086403 -0.15334 1.034
## 39 -0.083121  0.20544 -0.045772  5.21e-02 -0.203131 -0.24677 1.054
## 40  0.152467 -0.02970  0.151483 -1.63e-01  0.032106 -0.21212 1.019
## 41  0.002327 -0.01530 -0.000434 -1.92e-03  0.015947  0.03409 1.093
## 42 -0.001566  0.00332 -0.000752  1.27e-03 -0.003399 -0.00694 1.092
## 43 -0.039107  0.11655 -0.016950  2.19e-02 -0.115675 -0.14952 1.064
## 44 -0.027999 -0.11046 -0.021059  2.85e-02  0.111714 -0.14919 1.089
## 45 -0.000546 -0.00544 -0.000433  8.74e-04  0.005411 -0.00711 1.114
## 46 -0.009063  0.01889 -0.002513  5.52e-03 -0.018974 -0.04849 1.076
## 47 -0.002202  0.00112 -0.002566  1.93e-03 -0.000935  0.01048 1.088
## 48 -0.002937  0.00350 -0.001218  2.35e-03 -0.003634 -0.01517 1.084
## 49  0.159432 -0.04698  0.162450 -1.76e-01  0.051421 -0.25338 0.982
## 50 -0.101653  0.10473 -0.090453  9.35e-02 -0.105584 -0.16303 1.059
## 51 -0.081377  0.02529 -0.076501  7.95e-02 -0.026046 -0.12191 1.047
## 52 -0.099716  0.09732 -0.089107  9.21e-02 -0.098168 -0.15790 1.057
## 53 -0.002025  0.01874  0.000521 -9.74e-04 -0.018224  0.02530 1.138
## 54  0.153429  0.09921  0.131520 -1.56e-01 -0.100335 -0.23838 1.149
## 55  0.041217  0.00259  0.031761 -3.87e-02 -0.003279 -0.06524 1.102
## 56  0.014599 -0.02015  0.009917 -1.06e-02  0.019391 -0.03490 1.117
## 57 -0.235589  0.03237 -0.211317  2.27e-01 -0.032098 -0.26485 1.049
## 58 -0.048198 -0.11798 -0.054597  5.77e-02  0.117361 -0.16443 1.074
## 59 -0.186548 -0.06523 -0.171879  1.86e-01  0.065393 -0.25265 1.015
## 60 -0.037707 -0.05542 -0.039495  4.82e-02  0.052562 -0.13667 1.058
## 61  0.024059  0.07541  0.018819 -2.64e-02 -0.076628 -0.11635 1.089
## 62 -0.126961 -0.10712 -0.143815  1.49e-01  0.103065 -0.23796 1.069
## 63 -0.015838  0.02280 -0.024985  2.28e-02 -0.025296 -0.08159 1.111
## 64 -0.047739  0.04873 -0.060306  5.62e-02 -0.052219 -0.13562 1.080
## 65 -0.035161  0.06111 -0.045946  3.98e-02 -0.063646 -0.11608 1.097
## 66  0.190254  0.08129  0.189618 -1.71e-01 -0.089976  0.34353 1.169
## 67 -0.036320  0.12250 -0.049478  3.59e-02 -0.124629 -0.16959 1.099
## 68  0.006786 -0.01561  0.000503 -2.83e-03  0.014952 -0.02907 1.128
## 69  0.006291 -0.00920 -0.005653  9.04e-05  0.007847 -0.05551 1.101
## 70 -0.052619 -0.02289 -0.034244  5.05e-02  0.023153  0.09494 1.216
## 71  0.375081 -0.20353  0.225899 -4.17e-01  0.231060 -1.12358 1.033
## 72 -0.016888  0.04110 -0.005558  8.66e-03 -0.039847  0.05801 1.180
## 73 -0.001556  0.00260 -0.000118  6.18e-04 -0.002414  0.00678 1.127
## 74  0.006863 -0.00131  0.011133 -1.10e-02  0.002486  0.03234 1.215
## 75 -0.206329  0.04805 -0.234704  2.32e-01 -0.055962 -0.32060 1.068
## 76 -0.054797  0.05050 -0.076780  6.57e-02 -0.053946 -0.14193 1.124
## 77  0.214209 -0.29653  0.605131 -1.15e-01  0.240034  2.60978 0.431
## 78 -0.001403 -0.00835  0.020701 -4.72e-03  0.009157  0.08386 1.146
## 79  0.328332  0.14545  0.443574 -3.01e-01 -0.167508  0.97032 1.024
## 80 -0.249491 -0.14528 -0.044284  2.05e-01  0.150314  0.79955 1.013
## 81 -0.316601 -0.03288 -0.384383  3.44e-01  0.030357 -0.43138 1.273
##      cook.d    hat inf
## 1  6.05e-02 0.0520   *
## 2  2.80e-02 0.0443    
## 3  1.85e-02 0.0293    
## 4  7.28e-03 0.0643    
## 5  2.96e-02 0.0396    
## 6  4.81e-03 0.0598    
## 7  2.07e-02 0.0330    
## 8  2.56e-03 0.1102    
## 9  4.06e-03 0.1271   *
## 10 7.54e-04 0.0313    
## 11 2.97e-03 0.0288    
## 12 8.73e-02 0.0997    
## 13 3.32e-04 0.0776    
## 14 3.07e-03 0.0298    
## 15 5.27e-03 0.0316    
## 16 5.29e-03 0.0520    
## 17 1.17e-03 0.0402    
## 18 7.63e-02 0.0861    
## 19 6.47e-04 0.0281    
## 20 5.80e-04 0.0496    
## 21 1.13e-05 0.0383    
## 22 1.03e-02 0.0324    
## 23 1.53e-04 0.0771    
## 24 3.11e-04 0.0350    
## 25 1.49e-03 0.0514    
## 26 5.86e-02 0.0731    
## 27 1.01e-02 0.0964    
## 28 6.56e-05 0.0440    
## 29 3.30e-02 0.0991    
## 30 1.81e-04 0.0463    
## 31 1.34e-02 0.0570    
## 32 9.98e-04 0.0197    
## 33 4.03e-04 0.0502    
## 34 1.14e-03 0.0486    
## 35 2.86e-03 0.0488    
## 36 4.41e-05 0.0241    
## 37 3.28e-04 0.0184    
## 38 4.71e-03 0.0259    
## 39 1.22e-02 0.0548    
## 40 8.97e-03 0.0347    
## 41 2.35e-04 0.0255    
## 42 9.76e-06 0.0213    
## 43 4.49e-03 0.0357    
## 44 4.48e-03 0.0478    
## 45 1.03e-05 0.0406    
## 46 4.76e-04 0.0160    
## 47 2.23e-05 0.0183    
## 48 4.66e-05 0.0155    
## 49 1.27e-02 0.0342    
## 50 5.34e-03 0.0368    
## 51 2.99e-03 0.0224    
## 52 5.01e-03 0.0348    
## 53 1.30e-04 0.0618    
## 54 1.14e-02 0.1010    
## 55 8.61e-04 0.0373    
## 56 2.47e-04 0.0448    
## 57 1.40e-02 0.0567    
## 58 5.44e-03 0.0435    
## 59 1.27e-02 0.0426    
## 60 3.76e-03 0.0301    
## 61 2.73e-03 0.0399    
## 62 1.13e-02 0.0587    
## 63 1.35e-03 0.0466    
## 64 3.71e-03 0.0394    
## 65 2.72e-03 0.0445    
## 66 2.37e-02 0.1317    
## 67 5.79e-03 0.0578    
## 68 1.71e-04 0.0533    
## 69 6.24e-04 0.0349    
## 70 1.83e-03 0.1249   *
## 71 2.42e-01 0.2225   *
## 72 6.82e-04 0.0966    
## 73 9.33e-06 0.0524    
## 74 2.12e-04 0.1211   *
## 75 2.05e-02 0.0774    
## 76 4.07e-03 0.0667    
## 77 1.09e+00 0.2514   *
## 78 1.42e-03 0.0730    
## 79 1.81e-01 0.1925   *
## 80 1.24e-01 0.1569   *
## 81 3.73e-02 0.2008   *
library(car)
## Warning: package 'car' was built under R version 3.4.4
## Loading required package: carData
## Warning: package 'carData' was built under R version 3.4.4
## plotting Influential measures 
windows()
influenceIndexPlot(m1) # index plots for infuence measures

influencePlot(m1) # A user friendly representation of the above

##      StudRes        Hat      CookD
## 1   2.421762 0.05200781 0.06047977
## 71 -2.100131 0.22253511 0.24164401
## 77  4.503603 0.25138750 1.08651940
# Regression after deleting the 77th observation, which is influential observation
model.car1<-lm(MPG ~ VOL + SP + HP + WT,data=Cars[-77,])
summary(model.car1)
## 
## Call:
## lm(formula = MPG ~ VOL + SP + HP + WT, data = Cars[-77, ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3943 -2.3555 -0.5913  1.8978 12.0184 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.82675   13.32251   2.089  0.04013 *  
## VOL         -0.18546    0.50895  -0.364  0.71659    
## SP           0.41189    0.14139   2.913  0.00471 ** 
## HP          -0.22664    0.03534  -6.413 1.14e-08 ***
## WT           0.03754    1.51458   0.025  0.98029    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.008 on 75 degrees of freedom
## Multiple R-squared:  0.8192, Adjusted R-squared:  0.8096 
## F-statistic: 84.96 on 4 and 75 DF,  p-value: < 2.2e-16
# Regression after deleting the 77th & 71st Observations
model.car2<-lm(MPG~VOL+SP+HP+WT,data=Cars[-c(71,77,1),])
summary(model.car2)
## 
## Call:
## lm(formula = MPG ~ VOL + SP + HP + WT, data = Cars[-c(71, 77, 
##     1), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7300 -2.5391 -0.3696  2.1482 10.7151 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 24.82062   13.01740   1.907  0.06049 .  
## VOL         -0.31823    0.49668  -0.641  0.52372    
## SP           0.44618    0.13881   3.214  0.00195 ** 
## HP          -0.22688    0.03413  -6.647 4.67e-09 ***
## WT           0.40617    1.48045   0.274  0.78459    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.859 on 73 degrees of freedom
## Multiple R-squared:  0.821,  Adjusted R-squared:  0.8112 
## F-statistic: 83.72 on 4 and 73 DF,  p-value: < 2.2e-16
## Variance Inflation factor to check collinearity b/n variables 
vif(m1)
##       VOL        HP        SP        WT 
## 638.80608  19.92659  20.00764 639.53382
## vif>10 then there exists collinearity among all the variables 

## Added Variable plot to check correlation b/n variables and o/p variable

windows()
avPlots(m1)

## VIF and AV plot has given us an indication to delete "wt" variable

## Final model
finalmodel<-lm(MPG ~ WT + SP+ HP)
summary(finalmodel)
## 
## Call:
## lm(formula = MPG ~ WT + SP + HP)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.7567 -2.7652 -0.3683  1.8589 15.7690 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.78481   14.49047   1.986   0.0505 .  
## WT          -0.59941    0.06739  -8.895 1.91e-13 ***
## SP           0.40775    0.15626   2.609   0.0109 *  
## HP          -0.20850    0.03871  -5.386 7.56e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.469 on 77 degrees of freedom
## Multiple R-squared:  0.7695, Adjusted R-squared:  0.7605 
## F-statistic: 85.68 on 3 and 77 DF,  p-value: < 2.2e-16