Cars <- read.csv("E:/Excelr DS/R _Codes/Multilinear Regression/Cars.csv")
#View(Cars)
summary(Cars)
## HP MPG VOL SP
## Min. : 49.0 Min. :12.10 Min. : 50.00 Min. : 99.56
## 1st Qu.: 84.0 1st Qu.:27.86 1st Qu.: 89.00 1st Qu.:113.83
## Median :100.0 Median :35.15 Median :101.00 Median :118.21
## Mean :117.5 Mean :34.42 Mean : 98.77 Mean :121.54
## 3rd Qu.:140.0 3rd Qu.:39.53 3rd Qu.:113.00 3rd Qu.:126.40
## Max. :322.0 Max. :53.70 Max. :160.00 Max. :169.60
## WT
## Min. :15.71
## 1st Qu.:29.59
## Median :32.73
## Mean :32.41
## 3rd Qu.:37.39
## Max. :53.00
attach(Cars)
plot(Cars)
cor(Cars)
## HP MPG VOL SP WT
## HP 1.00000000 -0.7250383 0.07745947 0.9738481 0.07651307
## MPG -0.72503835 1.0000000 -0.52905658 -0.6871246 -0.52675909
## VOL 0.07745947 -0.5290566 1.00000000 0.1021700 0.99920308
## SP 0.97384807 -0.6871246 0.10217001 1.0000000 0.10243919
## WT 0.07651307 -0.5267591 0.99920308 0.1024392 1.00000000
library(corpcor)
cor2pcor(cor(Cars)) # Pure Correlation b/n Varibales to check collinearity Problem
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.00000000 -0.51507804 0.07802551 0.9448373 -0.10251007
## [2,] -0.51507804 1.00000000 -0.06763373 0.2756467 0.02712318
## [3,] 0.07802551 -0.06763373 1.00000000 -0.1056994 0.99838084
## [4,] 0.94483727 0.27564673 -0.10569943 1.0000000 0.12170021
## [5,] -0.10251007 0.02712318 0.99838084 0.1217002 1.00000000
model.car <- lm(MPG ~ VOL+HP+SP+WT)
summary(model.car)
##
## Call:
## lm(formula = MPG ~ VOL + HP + SP + WT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6320 -2.9944 -0.3705 2.2149 15.6179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.67734 14.90030 2.059 0.0429 *
## VOL -0.33605 0.56864 -0.591 0.5563
## HP -0.20544 0.03922 -5.239 1.4e-06 ***
## SP 0.39563 0.15826 2.500 0.0146 *
## WT 0.40057 1.69346 0.237 0.8136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.488 on 76 degrees of freedom
## Multiple R-squared: 0.7705, Adjusted R-squared: 0.7585
## F-statistic: 63.8 on 4 and 76 DF, p-value: < 2.2e-16
# Prediction based on only Volume
model.carV <- lm(MPG ~ VOL)
summary(model.carV) # Volume became significant
##
## Call:
## lm(formula = MPG ~ VOL)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.3074 -5.2026 0.1902 5.4536 17.1632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.81709 3.95696 14.106 < 2e-16 ***
## VOL -0.21662 0.03909 -5.541 3.82e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.798 on 79 degrees of freedom
## Multiple R-squared: 0.2799, Adjusted R-squared: 0.2708
## F-statistic: 30.71 on 1 and 79 DF, p-value: 3.823e-07
# Prediction based on only Weight
model.carW<-lm(MPG ~ WT)
summary(model.carW) # Weight became significant
##
## Call:
## lm(formula = MPG ~ WT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.3933 -5.4377 0.2738 5.2951 16.9351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.2296 3.8761 14.249 < 2e-16 ***
## WT -0.6420 0.1165 -5.508 4.38e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.811 on 79 degrees of freedom
## Multiple R-squared: 0.2775, Adjusted R-squared: 0.2683
## F-statistic: 30.34 on 1 and 79 DF, p-value: 4.383e-07
# Prediction based on Volume and Weight
model.carVW <-lm(MPG ~ VOL+WT)
summary(model.carVW) # Both became Insignificant
##
## Call:
## lm(formula = MPG ~ VOL + WT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.9939 -4.9460 0.0028 5.3905 17.6972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.8847 4.5342 12.55 <2e-16 ***
## VOL -0.6983 0.9841 -0.71 0.480
## WT 1.4349 2.9291 0.49 0.626
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.835 on 78 degrees of freedom
## Multiple R-squared: 0.2821, Adjusted R-squared: 0.2637
## F-statistic: 15.33 on 2 and 78 DF, p-value: 2.434e-06
# It is Better to delete influential observations rather than deleting entire column which is
# costliest process
# Deletion Diagnostics for identifying influential observations
influence.measures(model.car)
## Influence measures of
## lm(formula = MPG ~ VOL + HP + SP + WT) :
##
## dfb.1_ dfb.VOL dfb.HP dfb.SP dfb.WT dffit cov.r
## 1 0.027438 0.34491 -0.064490 -1.06e-02 -0.348732 0.56724 0.774
## 2 0.130255 -0.22541 0.035945 -8.57e-02 0.224097 0.37945 0.913
## 3 0.090410 -0.03970 0.010673 -6.06e-02 0.038239 0.30826 0.896
## 4 -0.050842 -0.12604 -0.086513 7.38e-02 0.124908 0.19016 1.103
## 5 0.086150 0.17799 0.014157 -7.17e-02 -0.179572 0.39154 0.874
## 6 -0.038227 -0.09931 -0.068912 5.82e-02 0.097995 0.15449 1.108
## 7 0.106859 -0.11741 0.021234 -7.10e-02 0.116017 0.32649 0.901
## 8 -0.065348 -0.01492 -0.044642 5.30e-02 0.017852 -0.11238 1.193
## 9 -0.093081 0.05912 -0.062806 7.25e-02 -0.055744 -0.14161 1.213
## 10 -0.031771 -0.00857 -0.041975 3.74e-02 0.008142 0.06104 1.095
## 11 0.010849 -0.05995 -0.018542 5.55e-03 0.058726 0.12152 1.064
## 12 0.185184 -0.24251 0.269479 -2.60e-01 0.264240 -0.67399 0.909
## 13 0.029117 0.01302 0.032757 -3.20e-02 -0.012894 -0.04051 1.157
## 14 0.011711 -0.06396 -0.017970 4.99e-03 0.062745 0.12343 1.066
## 15 0.054751 -0.07598 0.013861 -3.43e-02 0.074656 0.16218 1.046
## 16 0.024610 -0.12416 -0.009612 -3.19e-03 0.122996 0.16212 1.092
## 17 -0.035743 0.04293 -0.043967 3.84e-02 -0.043618 0.07590 1.103
## 18 0.109967 0.11288 0.222667 -2.14e-01 -0.091298 -0.63072 0.889
## 19 -0.021167 -0.01180 -0.032416 2.75e-02 0.011223 0.05655 1.091
## 20 -0.034346 0.03309 -0.035729 3.34e-02 -0.033011 0.05350 1.120
## 21 0.005188 -0.00310 0.005631 -5.32e-03 0.003118 -0.00748 1.111
## 22 -0.009172 0.13117 -0.027690 1.91e-03 -0.128786 0.22782 0.997
## 23 -0.017641 0.02021 -0.017551 1.69e-02 -0.020237 0.02750 1.157
## 24 -0.000285 -0.02650 -0.007742 4.68e-03 0.026417 0.03917 1.104
## 25 0.048301 -0.05565 0.052463 -5.01e-02 0.056583 -0.08596 1.116
## 26 -0.082883 0.01903 0.016880 -2.74e-03 0.000512 -0.55124 0.897
## 27 -0.158459 0.14820 -0.151772 1.46e-01 -0.146183 0.22349 1.146
## 28 0.008111 0.00955 0.010392 -9.73e-03 -0.009524 -0.01799 1.117
## 29 0.323026 -0.18454 0.261335 -3.06e-01 0.187890 0.40784 1.073
## 30 0.013519 0.01685 0.015781 -1.53e-02 -0.017023 -0.02991 1.119
## 31 -0.037571 0.20218 -0.035690 1.77e-02 -0.198784 0.25885 1.053
## 32 0.026770 -0.01523 0.015135 -2.32e-02 0.015672 0.07028 1.072
## 33 0.020540 -0.03504 0.020495 -1.95e-02 0.035187 -0.04458 1.122
## 34 -0.030604 -0.04250 -0.035152 3.35e-02 0.043347 0.07496 1.115
## 35 0.010936 0.09112 0.005937 -1.56e-02 -0.090704 0.11893 1.103
## 36 -0.001573 0.00893 -0.002726 1.58e-03 -0.008936 0.01476 1.094
## 37 -0.009721 -0.00385 -0.002428 5.92e-03 0.004414 -0.04023 1.082
## 38 -0.017382 -0.08410 -0.004671 1.08e-02 0.086403 -0.15334 1.034
## 39 -0.083121 0.20544 -0.045772 5.21e-02 -0.203131 -0.24677 1.054
## 40 0.152467 -0.02970 0.151483 -1.63e-01 0.032106 -0.21212 1.019
## 41 0.002327 -0.01530 -0.000434 -1.92e-03 0.015947 0.03409 1.093
## 42 -0.001566 0.00332 -0.000752 1.27e-03 -0.003399 -0.00694 1.092
## 43 -0.039107 0.11655 -0.016950 2.19e-02 -0.115675 -0.14952 1.064
## 44 -0.027999 -0.11046 -0.021059 2.85e-02 0.111714 -0.14919 1.089
## 45 -0.000546 -0.00544 -0.000433 8.74e-04 0.005411 -0.00711 1.114
## 46 -0.009063 0.01889 -0.002513 5.52e-03 -0.018974 -0.04849 1.076
## 47 -0.002202 0.00112 -0.002566 1.93e-03 -0.000935 0.01048 1.088
## 48 -0.002937 0.00350 -0.001218 2.35e-03 -0.003634 -0.01517 1.084
## 49 0.159432 -0.04698 0.162450 -1.76e-01 0.051421 -0.25338 0.982
## 50 -0.101653 0.10473 -0.090453 9.35e-02 -0.105584 -0.16303 1.059
## 51 -0.081377 0.02529 -0.076501 7.95e-02 -0.026046 -0.12191 1.047
## 52 -0.099716 0.09732 -0.089107 9.21e-02 -0.098168 -0.15790 1.057
## 53 -0.002025 0.01874 0.000521 -9.74e-04 -0.018224 0.02530 1.138
## 54 0.153429 0.09921 0.131520 -1.56e-01 -0.100335 -0.23838 1.149
## 55 0.041217 0.00259 0.031761 -3.87e-02 -0.003279 -0.06524 1.102
## 56 0.014599 -0.02015 0.009917 -1.06e-02 0.019391 -0.03490 1.117
## 57 -0.235589 0.03237 -0.211317 2.27e-01 -0.032098 -0.26485 1.049
## 58 -0.048198 -0.11798 -0.054597 5.77e-02 0.117361 -0.16443 1.074
## 59 -0.186548 -0.06523 -0.171879 1.86e-01 0.065393 -0.25265 1.015
## 60 -0.037707 -0.05542 -0.039495 4.82e-02 0.052562 -0.13667 1.058
## 61 0.024059 0.07541 0.018819 -2.64e-02 -0.076628 -0.11635 1.089
## 62 -0.126961 -0.10712 -0.143815 1.49e-01 0.103065 -0.23796 1.069
## 63 -0.015838 0.02280 -0.024985 2.28e-02 -0.025296 -0.08159 1.111
## 64 -0.047739 0.04873 -0.060306 5.62e-02 -0.052219 -0.13562 1.080
## 65 -0.035161 0.06111 -0.045946 3.98e-02 -0.063646 -0.11608 1.097
## 66 0.190254 0.08129 0.189618 -1.71e-01 -0.089976 0.34353 1.169
## 67 -0.036320 0.12250 -0.049478 3.59e-02 -0.124629 -0.16959 1.099
## 68 0.006786 -0.01561 0.000503 -2.83e-03 0.014952 -0.02907 1.128
## 69 0.006291 -0.00920 -0.005653 9.04e-05 0.007847 -0.05551 1.101
## 70 -0.052619 -0.02289 -0.034244 5.05e-02 0.023153 0.09494 1.216
## 71 0.375081 -0.20353 0.225899 -4.17e-01 0.231060 -1.12358 1.033
## 72 -0.016888 0.04110 -0.005558 8.66e-03 -0.039847 0.05801 1.180
## 73 -0.001556 0.00260 -0.000118 6.18e-04 -0.002414 0.00678 1.127
## 74 0.006863 -0.00131 0.011133 -1.10e-02 0.002486 0.03234 1.215
## 75 -0.206329 0.04805 -0.234704 2.32e-01 -0.055962 -0.32060 1.068
## 76 -0.054797 0.05050 -0.076780 6.57e-02 -0.053946 -0.14193 1.124
## 77 0.214209 -0.29653 0.605131 -1.15e-01 0.240034 2.60978 0.431
## 78 -0.001403 -0.00835 0.020701 -4.72e-03 0.009157 0.08386 1.146
## 79 0.328332 0.14545 0.443574 -3.01e-01 -0.167508 0.97032 1.024
## 80 -0.249491 -0.14528 -0.044284 2.05e-01 0.150314 0.79955 1.013
## 81 -0.316601 -0.03288 -0.384383 3.44e-01 0.030357 -0.43138 1.273
## cook.d hat inf
## 1 6.05e-02 0.0520 *
## 2 2.80e-02 0.0443
## 3 1.85e-02 0.0293
## 4 7.28e-03 0.0643
## 5 2.96e-02 0.0396
## 6 4.81e-03 0.0598
## 7 2.07e-02 0.0330
## 8 2.56e-03 0.1102
## 9 4.06e-03 0.1271 *
## 10 7.54e-04 0.0313
## 11 2.97e-03 0.0288
## 12 8.73e-02 0.0997
## 13 3.32e-04 0.0776
## 14 3.07e-03 0.0298
## 15 5.27e-03 0.0316
## 16 5.29e-03 0.0520
## 17 1.17e-03 0.0402
## 18 7.63e-02 0.0861
## 19 6.47e-04 0.0281
## 20 5.80e-04 0.0496
## 21 1.13e-05 0.0383
## 22 1.03e-02 0.0324
## 23 1.53e-04 0.0771
## 24 3.11e-04 0.0350
## 25 1.49e-03 0.0514
## 26 5.86e-02 0.0731
## 27 1.01e-02 0.0964
## 28 6.56e-05 0.0440
## 29 3.30e-02 0.0991
## 30 1.81e-04 0.0463
## 31 1.34e-02 0.0570
## 32 9.98e-04 0.0197
## 33 4.03e-04 0.0502
## 34 1.14e-03 0.0486
## 35 2.86e-03 0.0488
## 36 4.41e-05 0.0241
## 37 3.28e-04 0.0184
## 38 4.71e-03 0.0259
## 39 1.22e-02 0.0548
## 40 8.97e-03 0.0347
## 41 2.35e-04 0.0255
## 42 9.76e-06 0.0213
## 43 4.49e-03 0.0357
## 44 4.48e-03 0.0478
## 45 1.03e-05 0.0406
## 46 4.76e-04 0.0160
## 47 2.23e-05 0.0183
## 48 4.66e-05 0.0155
## 49 1.27e-02 0.0342
## 50 5.34e-03 0.0368
## 51 2.99e-03 0.0224
## 52 5.01e-03 0.0348
## 53 1.30e-04 0.0618
## 54 1.14e-02 0.1010
## 55 8.61e-04 0.0373
## 56 2.47e-04 0.0448
## 57 1.40e-02 0.0567
## 58 5.44e-03 0.0435
## 59 1.27e-02 0.0426
## 60 3.76e-03 0.0301
## 61 2.73e-03 0.0399
## 62 1.13e-02 0.0587
## 63 1.35e-03 0.0466
## 64 3.71e-03 0.0394
## 65 2.72e-03 0.0445
## 66 2.37e-02 0.1317
## 67 5.79e-03 0.0578
## 68 1.71e-04 0.0533
## 69 6.24e-04 0.0349
## 70 1.83e-03 0.1249 *
## 71 2.42e-01 0.2225 *
## 72 6.82e-04 0.0966
## 73 9.33e-06 0.0524
## 74 2.12e-04 0.1211 *
## 75 2.05e-02 0.0774
## 76 4.07e-03 0.0667
## 77 1.09e+00 0.2514 *
## 78 1.42e-03 0.0730
## 79 1.81e-01 0.1925 *
## 80 1.24e-01 0.1569 *
## 81 3.73e-02 0.2008 *
library(car)
## Warning: package 'car' was built under R version 3.4.4
## Loading required package: carData
## Warning: package 'carData' was built under R version 3.4.4
## plotting Influential measures
influenceIndexPlot(model.car) # index plots for infuence measures
influencePlot(model.car,id.n=3) # A user friendly representation of the above
## Warning in plot.window(...): "id.n" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not
## a graphical parameter
## Warning in box(...): "id.n" is not a graphical parameter
## Warning in title(...): "id.n" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
## graphical parameter
## StudRes Hat CookD
## 1 2.421762 0.05200781 0.06047977
## 71 -2.100131 0.22253511 0.24164401
## 77 4.503603 0.25138750 1.08651940
# Regression after deleting the 77th observation, which is influential observation
model.car1 <- lm(MPG ~ VOL+SP+HP+WT,data=Cars[-77,])
summary(model.car)
##
## Call:
## lm(formula = MPG ~ VOL + HP + SP + WT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6320 -2.9944 -0.3705 2.2149 15.6179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.67734 14.90030 2.059 0.0429 *
## VOL -0.33605 0.56864 -0.591 0.5563
## HP -0.20544 0.03922 -5.239 1.4e-06 ***
## SP 0.39563 0.15826 2.500 0.0146 *
## WT 0.40057 1.69346 0.237 0.8136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.488 on 76 degrees of freedom
## Multiple R-squared: 0.7705, Adjusted R-squared: 0.7585
## F-statistic: 63.8 on 4 and 76 DF, p-value: < 2.2e-16
# Regression after deleting the 77th & 71st Observations
model.car2 <- lm(MPG~VOL+SP+HP+WT,data=Cars[-c(71,77),])
summary(model.car2)
##
## Call:
## lm(formula = MPG ~ VOL + SP + HP + WT, data = Cars[-c(71, 77),
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9343 -2.3434 -0.5155 1.9756 10.8897
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.26269 13.49494 1.872 0.0652 .
## VOL -0.13878 0.50979 -0.272 0.7862
## SP 0.44336 0.14391 3.081 0.0029 **
## HP -0.22953 0.03537 -6.489 8.68e-09 ***
## WT -0.13051 1.51940 -0.086 0.9318
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.001 on 74 degrees of freedom
## Multiple R-squared: 0.8162, Adjusted R-squared: 0.8063
## F-statistic: 82.15 on 4 and 74 DF, p-value: < 2.2e-16
## Variance Inflation factor to check collinearity b/n variables
vif(model.car)
## VOL HP SP WT
## 638.80608 19.92659 20.00764 639.53382
## vif>10 then there exists collinearity among all the variables
## Added Variable plot to check correlation b/n variables and o/p variable
avPlots(model.car)
## VIF and AV plot has given us an indication to delete "wt" variable
## Final model
finalmodel<-lm(MPG ~ VOL+SP+HP)
summary(finalmodel)
##
## Call:
## lm(formula = MPG ~ VOL + SP + HP)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5869 -2.8942 -0.3157 2.1291 15.6669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.92339 14.46589 2.069 0.0419 *
## VOL -0.20165 0.02259 -8.928 1.65e-13 ***
## SP 0.40066 0.15586 2.571 0.0121 *
## HP -0.20670 0.03861 -5.353 8.64e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.46 on 77 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.7614
## F-statistic: 86.11 on 3 and 77 DF, p-value: < 2.2e-16
# Evaluate model LINE assumptions
plot(finalmodel)
#Residual plots,QQplot,std-Residuals Vs Fitted,Cook's Distance
qqPlot(model.car,id.n = 5)
## [1] 1 77
# QQ plot of studentized residuals helps in identifying outlier
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.4.1
library(MASS)
## Warning: package 'MASS' was built under R version 3.4.4
x<- stepAIC(model.car)
## Start: AIC=248.06
## MPG ~ VOL + HP + SP + WT
##
## Df Sum of Sq RSS AIC
## - WT 1 1.13 1531.8 246.12
## - VOL 1 7.03 1537.7 246.43
## <none> 1530.7 248.06
## - SP 1 125.87 1656.5 252.46
## - HP 1 552.74 2083.4 271.03
##
## Step: AIC=246.12
## MPG ~ VOL + HP + SP
##
## Df Sum of Sq RSS AIC
## <none> 1531.8 246.12
## - SP 1 131.46 1663.3 250.79
## - HP 1 570.08 2101.9 269.75
## - VOL 1 1585.81 3117.6 301.68
x
##
## Call:
## lm(formula = MPG ~ VOL + HP + SP)
##
## Coefficients:
## (Intercept) VOL HP SP
## 29.9234 -0.2017 -0.2067 0.4007
```