Corolla <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\3 MLR\\Assignment\\ToyotaCorolla.csv")
mydata<- Corolla[c("Price","Age_08_04","KM","HP","cc","Doors","Gears","Quarterly_Tax","Weight")]
colnames(mydata)
## [1] "Price" "Age_08_04" "KM" "HP"
## [5] "cc" "Doors" "Gears" "Quarterly_Tax"
## [9] "Weight"
attach(mydata)
summary(mydata)
## Price Age_08_04 KM HP
## Min. : 4350 Min. : 1.00 Min. : 1 Min. : 69.0
## 1st Qu.: 8450 1st Qu.:44.00 1st Qu.: 43000 1st Qu.: 90.0
## Median : 9900 Median :61.00 Median : 63390 Median :110.0
## Mean :10731 Mean :55.95 Mean : 68533 Mean :101.5
## 3rd Qu.:11950 3rd Qu.:70.00 3rd Qu.: 87021 3rd Qu.:110.0
## Max. :32500 Max. :80.00 Max. :243000 Max. :192.0
## cc Doors Gears Quarterly_Tax
## Min. : 1300 Min. :2.000 Min. :3.000 Min. : 19.00
## 1st Qu.: 1400 1st Qu.:3.000 1st Qu.:5.000 1st Qu.: 69.00
## Median : 1600 Median :4.000 Median :5.000 Median : 85.00
## Mean : 1577 Mean :4.033 Mean :5.026 Mean : 87.12
## 3rd Qu.: 1600 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.: 85.00
## Max. :16000 Max. :5.000 Max. :6.000 Max. :283.00
## Weight
## Min. :1000
## 1st Qu.:1040
## Median :1070
## Mean :1072
## 3rd Qu.:1085
## Max. :1615
cor(mydata)
## Price Age_08_04 KM HP cc
## Price 1.00000000 -0.876590497 -0.56996016 0.31498983 0.12638920
## Age_08_04 -0.87659050 1.000000000 0.50567218 -0.15662202 -0.09808374
## KM -0.56996016 0.505672180 1.00000000 -0.33353795 0.10268289
## HP 0.31498983 -0.156622020 -0.33353795 1.00000000 0.03585580
## cc 0.12638920 -0.098083739 0.10268289 0.03585580 1.00000000
## Doors 0.18532555 -0.148359215 -0.03619661 0.09242450 0.07990330
## Gears 0.06310386 -0.005363947 0.01502333 0.20947715 0.01462935
## Quarterly_Tax 0.21919691 -0.198430508 0.27816470 -0.29843172 0.30699580
## Weight 0.58119759 -0.470253184 -0.02859846 0.08961406 0.33563740
## Doors Gears Quarterly_Tax Weight
## Price 0.18532555 0.063103857 0.219196911 0.58119759
## Age_08_04 -0.14835921 -0.005363947 -0.198430508 -0.47025318
## KM -0.03619661 0.015023328 0.278164697 -0.02859846
## HP 0.09242450 0.209477146 -0.298431717 0.08961406
## cc 0.07990330 0.014629352 0.306995798 0.33563740
## Doors 1.00000000 -0.160141430 0.109363225 0.30261764
## Gears -0.16014143 1.000000000 -0.005451955 0.02061328
## Quarterly_Tax 0.10936323 -0.005451955 1.000000000 0.62613373
## Weight 0.30261764 0.020613284 0.626133733 1.00000000
plot(mydata)
model <- lm(Price~., data = mydata)
summary(model) # R squared: 0.8638
##
## Call:
## lm(formula = Price ~ ., data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9366.4 -793.3 -21.3 799.7 6444.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.573e+03 1.411e+03 -3.949 8.24e-05 ***
## Age_08_04 -1.217e+02 2.616e+00 -46.512 < 2e-16 ***
## KM -2.082e-02 1.252e-03 -16.622 < 2e-16 ***
## HP 3.168e+01 2.818e+00 11.241 < 2e-16 ***
## cc -1.211e-01 9.009e-02 -1.344 0.17909
## Doors -1.617e+00 4.001e+01 -0.040 0.96777
## Gears 5.943e+02 1.971e+02 3.016 0.00261 **
## Quarterly_Tax 3.949e+00 1.310e+00 3.015 0.00262 **
## Weight 1.696e+01 1.068e+00 15.880 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1342 on 1427 degrees of freedom
## Multiple R-squared: 0.8638, Adjusted R-squared: 0.863
## F-statistic: 1131 on 8 and 1427 DF, p-value: < 2.2e-16
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
#influence.measures(model)
influenceIndexPlot(model)
influencePlot(model)
## StudRes Hat CookD
## 81 8.164500 0.9182368 79.5201062
## 222 -7.673262 0.1397116 1.0210312
## 961 -5.456195 0.1572484 0.6049996
vif(model)
## Age_08_04 KM HP cc Doors
## 1.884620 1.756905 1.419422 1.163894 1.156575
## Gears Quarterly_Tax Weight
## 1.098723 2.311431 2.516420
model2 <- lm(Price~., data = mydata[-c(81,222,961),])
summary(model2) # R squared: 0.8852
##
## Call:
## lm(formula = Price ~ ., data = mydata[-c(81, 222, 961), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -8756.8 -761.3 -31.7 720.6 6306.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.474e+04 1.433e+03 -10.289 < 2e-16 ***
## Age_08_04 -1.120e+02 2.479e+00 -45.185 < 2e-16 ***
## KM -1.699e-02 1.200e-03 -14.160 < 2e-16 ***
## HP 3.661e+01 2.745e+00 13.334 < 2e-16 ***
## cc -3.795e+00 3.021e-01 -12.562 < 2e-16 ***
## Doors -1.225e+02 3.748e+01 -3.270 0.00110 **
## Gears 4.650e+02 1.810e+02 2.569 0.01029 *
## Quarterly_Tax 5.213e+00 1.371e+00 3.802 0.00015 ***
## Weight 3.064e+01 1.290e+00 23.748 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1231 on 1424 degrees of freedom
## Multiple R-squared: 0.8852, Adjusted R-squared: 0.8845
## F-statistic: 1372 on 8 and 1424 DF, p-value: < 2.2e-16
library(MASS)
stepAIC(model)
## Start: AIC=20693.89
## Price ~ Age_08_04 + KM + HP + cc + Doors + Gears + Quarterly_Tax +
## Weight
##
## Df Sum of Sq RSS AIC
## - Doors 1 2943 2571786477 20692
## - cc 1 3256511 2575040045 20694
## <none> 2571783534 20694
## - Quarterly_Tax 1 16377633 2588161166 20701
## - Gears 1 16393629 2588177163 20701
## - HP 1 227730786 2799514319 20814
## - Weight 1 454465243 3026248777 20926
## - KM 1 497917334 3069700867 20946
## - Age_08_04 1 3898860600 6470644134 22017
##
## Step: AIC=20691.89
## Price ~ Age_08_04 + KM + HP + cc + Gears + Quarterly_Tax + Weight
##
## Df Sum of Sq RSS AIC
## - cc 1 3254209 2575040686 20692
## <none> 2571786477 20692
## - Quarterly_Tax 1 16503849 2588290326 20699
## - Gears 1 17093855 2588880332 20699
## - HP 1 228761929 2800548406 20812
## - Weight 1 484447009 3056233485 20938
## - KM 1 498427860 3070214337 20944
## - Age_08_04 1 3898877516 6470663993 22015
##
## Step: AIC=20691.7
## Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax + Weight
##
## Df Sum of Sq RSS AIC
## <none> 2575040686 20692
## - Quarterly_Tax 1 14976762 2590017448 20698
## - Gears 1 17276597 2592317283 20699
## - HP 1 225684613 2800725299 20810
## - Weight 1 484245502 3059286188 20937
## - KM 1 506728527 3081769213 20948
## - Age_08_04 1 3902107988 6477148674 22014
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax +
## Weight, data = mydata)
##
## Coefficients:
## (Intercept) Age_08_04 KM HP Gears
## -5.478e+03 -1.217e+02 -2.094e-02 3.133e+01 5.990e+02
## Quarterly_Tax Weight
## 3.737e+00 1.673e+01
avPlots(model)
model3 <- lm(Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax + Weight, data = mydata[-c(81,222,961),])
summary(model3) # R-Squared value : 0.8722
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax +
## Weight, data = mydata[-c(81, 222, 961), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -9324.5 -778.3 -17.5 764.0 6224.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.124e+04 1.482e+03 -7.584 6.01e-14 ***
## Age_08_04 -1.161e+02 2.591e+00 -44.812 < 2e-16 ***
## KM -2.146e-02 1.209e-03 -17.753 < 2e-16 ***
## HP 2.625e+01 2.763e+00 9.503 < 2e-16 ***
## Gears 6.389e+02 1.871e+02 3.414 0.000657 ***
## Quarterly_Tax -7.066e-01 1.338e+00 -0.528 0.597431
## Weight 2.251e+01 1.149e+00 19.587 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1298 on 1426 degrees of freedom
## Multiple R-squared: 0.8722, Adjusted R-squared: 0.8717
## F-statistic: 1623 on 6 and 1426 DF, p-value: < 2.2e-16
plot(model2)
hist(residuals(model2))
### Model2 is the final model with the accuracy of 88.52 %.