Cars <- read.csv(file.choose()) ## choose the Cars.csv data set
head(Cars)
## HP MPG VOL SP WT
## 1 49 53.70068 89 104.1854 28.76206
## 2 55 50.01340 92 105.4613 30.46683
## 3 55 50.01340 92 105.4613 30.19360
## 4 70 45.69632 92 113.4613 30.63211
## 5 53 50.50423 92 104.4613 29.88915
## 6 70 45.69632 89 113.1854 29.59177
View(Cars)
attach(Cars)
### > Histogram, Box plot, Dot plot, Stem & Leaf plot, Bar Plot
summary(Cars)
str(HP)
windows()
plot(Cars)
pairs(Cars)
cor(Cars)
## HP MPG VOL SP WT
## HP 1.00000000 -0.7250383 0.07745947 0.9738481 0.07651307
## MPG -0.72503835 1.0000000 -0.52905658 -0.6871246 -0.52675909
## VOL 0.07745947 -0.5290566 1.00000000 0.1021700 0.99920308
## SP 0.97384807 -0.6871246 0.10217001 1.0000000 0.10243919
## WT 0.07651307 -0.5267591 0.99920308 0.1024392 1.00000000
plot(HP,MPG)
cor(HP,MPG)
## [1] -0.7250383
cor(MPG,HP)
## [1] -0.7250383
cor(Cars)
## HP MPG VOL SP WT
## HP 1.00000000 -0.7250383 0.07745947 0.9738481 0.07651307
## MPG -0.72503835 1.0000000 -0.52905658 -0.6871246 -0.52675909
## VOL 0.07745947 -0.5290566 1.00000000 0.1021700 0.99920308
## SP 0.97384807 -0.6871246 0.10217001 1.0000000 0.10243919
## WT 0.07651307 -0.5267591 0.99920308 0.1024392 1.00000000
m1 <- lm(MPG ~ VOL + HP + SP + WT, data = Cars)
summary(m1)
##
## Call:
## lm(formula = MPG ~ VOL + HP + SP + WT, data = Cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6320 -2.9944 -0.3705 2.2149 15.6179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.67734 14.90030 2.059 0.0429 *
## VOL -0.33605 0.56864 -0.591 0.5563
## HP -0.20544 0.03922 -5.239 1.4e-06 ***
## SP 0.39563 0.15826 2.500 0.0146 *
## WT 0.40057 1.69346 0.237 0.8136
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.488 on 76 degrees of freedom
## Multiple R-squared: 0.7705, Adjusted R-squared: 0.7585
## F-statistic: 63.8 on 4 and 76 DF, p-value: < 2.2e-16
mv <- lm(MPG ~ VOL, data=Cars)
summary(mv)## Volume become Significant
##
## Call:
## lm(formula = MPG ~ VOL, data = Cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.3074 -5.2026 0.1902 5.4536 17.1632
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.81709 3.95696 14.106 < 2e-16 ***
## VOL -0.21662 0.03909 -5.541 3.82e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.798 on 79 degrees of freedom
## Multiple R-squared: 0.2799, Adjusted R-squared: 0.2708
## F-statistic: 30.71 on 1 and 79 DF, p-value: 3.823e-07
mw <- lm(MPG ~ WT, data=Cars)
summary(mw)## Weight become Significant
##
## Call:
## lm(formula = MPG ~ WT, data = Cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.3933 -5.4377 0.2738 5.2951 16.9351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 55.2296 3.8761 14.249 < 2e-16 ***
## WT -0.6420 0.1165 -5.508 4.38e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.811 on 79 degrees of freedom
## Multiple R-squared: 0.2775, Adjusted R-squared: 0.2683
## F-statistic: 30.34 on 1 and 79 DF, p-value: 4.383e-07
mvw <-lm(MPG ~ VOL + WT, data=Cars)
summary(mvw) ## Both become Insignificant
##
## Call:
## lm(formula = MPG ~ VOL + WT, data = Cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.9939 -4.9460 0.0028 5.3905 17.6972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 56.8847 4.5342 12.55 <2e-16 ***
## VOL -0.6983 0.9841 -0.71 0.480
## WT 1.4349 2.9291 0.49 0.626
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.835 on 78 degrees of freedom
## Multiple R-squared: 0.2821, Adjusted R-squared: 0.2637
## F-statistic: 15.33 on 2 and 78 DF, p-value: 2.434e-06
library(car)
## Warning: package 'car' was built under R version 3.6.1
## Loading required package: carData
windows()
influencePlot(m1) ## A user friendly representation fo the above
## StudRes Hat CookD
## 1 2.421762 0.05200781 0.06047977
## 71 -2.100131 0.22253511 0.24164401
## 77 4.503603 0.25138750 1.08651940
##Regression after deleting the 77the observation, which is influential observation
m2 <- lm(MPG ~ VOL+SP+HP+WT,data=Cars[-77,])
summary(m2)
##
## Call:
## lm(formula = MPG ~ VOL + SP + HP + WT, data = Cars[-77, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3943 -2.3555 -0.5913 1.8978 12.0184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.82675 13.32251 2.089 0.04013 *
## VOL -0.18546 0.50895 -0.364 0.71659
## SP 0.41189 0.14139 2.913 0.00471 **
## HP -0.22664 0.03534 -6.413 1.14e-08 ***
## WT 0.03754 1.51458 0.025 0.98029
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.008 on 75 degrees of freedom
## Multiple R-squared: 0.8192, Adjusted R-squared: 0.8096
## F-statistic: 84.96 on 4 and 75 DF, p-value: < 2.2e-16
m3 <- lm(MPG ~ VOL+SP+HP+WT, data = Cars[-c(71,1,77),])
summary(m3)
##
## Call:
## lm(formula = MPG ~ VOL + SP + HP + WT, data = Cars[-c(71, 1,
## 77), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7300 -2.5391 -0.3696 2.1482 10.7151
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.82062 13.01740 1.907 0.06049 .
## VOL -0.31823 0.49668 -0.641 0.52372
## SP 0.44618 0.13881 3.214 0.00195 **
## HP -0.22688 0.03413 -6.647 4.67e-09 ***
## WT 0.40617 1.48045 0.274 0.78459
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.859 on 73 degrees of freedom
## Multiple R-squared: 0.821, Adjusted R-squared: 0.8112
## F-statistic: 83.72 on 4 and 73 DF, p-value: < 2.2e-16
vif(m1)
finalmodel <- lm(MPG ~ VOL+SP+HP, data=Cars)
summary(finalmodel)
##
## Call:
## lm(formula = MPG ~ VOL + SP + HP, data = Cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.5869 -2.8942 -0.3157 2.1291 15.6669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 29.92339 14.46589 2.069 0.0419 *
## VOL -0.20165 0.02259 -8.928 1.65e-13 ***
## SP 0.40066 0.15586 2.571 0.0121 *
## HP -0.20670 0.03861 -5.353 8.64e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.46 on 77 degrees of freedom
## Multiple R-squared: 0.7704, Adjusted R-squared: 0.7614
## F-statistic: 86.11 on 3 and 77 DF, p-value: < 2.2e-16
windows()
avPlots(m1)
avPlots(finalmodel)
### VIF and AV plot has given us an indication to delete “wt” variable
test <- read.csv(file.choose()) ##test.csv
predict(finalmodel, newdata= test)
## 1 2 3
## 42.36150 42.26954 42.85590
summary(Cars)
## HP MPG VOL SP
## Min. : 49.0 Min. :12.10 Min. : 50.00 Min. : 99.56
## 1st Qu.: 84.0 1st Qu.:27.86 1st Qu.: 89.00 1st Qu.:113.83
## Median :100.0 Median :35.15 Median :101.00 Median :118.21
## Mean :117.5 Mean :34.42 Mean : 98.77 Mean :121.54
## 3rd Qu.:140.0 3rd Qu.:39.53 3rd Qu.:113.00 3rd Qu.:126.40
## Max. :322.0 Max. :53.70 Max. :160.00 Max. :169.60
## WT
## Min. :15.71
## 1st Qu.:29.59
## Median :32.73
## Mean :32.41
## 3rd Qu.:37.39
## Max. :53.00
windows()
plot(test)
## transformation example on Final model
finalmodel1 <-lm(MPG~ log(VOL)+log(SP)+log(HP), data= Cars)
summary(finalmodel1)
##
## Call:
## lm(formula = MPG ~ log(VOL) + log(SP) + log(HP), data = Cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.5400 -0.7858 0.1328 0.9994 10.7265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -115.717 37.725 -3.067 0.00298 **
## log(VOL) -11.270 1.192 -9.454 1.60e-14 ***
## log(SP) 78.244 10.064 7.774 2.76e-11 ***
## log(HP) -37.144 2.625 -14.147 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.775 on 77 degrees of freedom
## Multiple R-squared: 0.9111, Adjusted R-squared: 0.9077
## F-statistic: 263.1 on 3 and 77 DF, p-value: < 2.2e-16
predict(finalmodel, newdata=test)
## 1 2 3
## 42.36150 42.26954 42.85590
windows() ##Evaluate model LINE assumptions
avPlots(finalmodel)
## Akaike Information Criteria 70% remedy, install.packages(“MASS”)
stepAIC(m1) ## automatically removes influential records on model