Multiple Linear Regression

Toyoto Corolla

Assignment 14

Corolla <- read.csv("C:\\Users\\RISHI RAHUL\\Desktop\\DS\\3 MLR\\Assignment\\ToyotaCorolla.csv")

mydata<- Corolla[c("Price","Age_08_04","KM","HP","cc","Doors","Gears","Quarterly_Tax","Weight")]
colnames(mydata)
## [1] "Price"         "Age_08_04"     "KM"            "HP"           
## [5] "cc"            "Doors"         "Gears"         "Quarterly_Tax"
## [9] "Weight"
attach(mydata)

summary(mydata)
##      Price         Age_08_04           KM               HP       
##  Min.   : 4350   Min.   : 1.00   Min.   :     1   Min.   : 69.0  
##  1st Qu.: 8450   1st Qu.:44.00   1st Qu.: 43000   1st Qu.: 90.0  
##  Median : 9900   Median :61.00   Median : 63390   Median :110.0  
##  Mean   :10731   Mean   :55.95   Mean   : 68533   Mean   :101.5  
##  3rd Qu.:11950   3rd Qu.:70.00   3rd Qu.: 87021   3rd Qu.:110.0  
##  Max.   :32500   Max.   :80.00   Max.   :243000   Max.   :192.0  
##        cc            Doors           Gears       Quarterly_Tax   
##  Min.   : 1300   Min.   :2.000   Min.   :3.000   Min.   : 19.00  
##  1st Qu.: 1400   1st Qu.:3.000   1st Qu.:5.000   1st Qu.: 69.00  
##  Median : 1600   Median :4.000   Median :5.000   Median : 85.00  
##  Mean   : 1577   Mean   :4.033   Mean   :5.026   Mean   : 87.12  
##  3rd Qu.: 1600   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.: 85.00  
##  Max.   :16000   Max.   :5.000   Max.   :6.000   Max.   :283.00  
##      Weight    
##  Min.   :1000  
##  1st Qu.:1040  
##  Median :1070  
##  Mean   :1072  
##  3rd Qu.:1085  
##  Max.   :1615
cor(mydata)
##                     Price    Age_08_04          KM          HP          cc
## Price          1.00000000 -0.876590497 -0.56996016  0.31498983  0.12638920
## Age_08_04     -0.87659050  1.000000000  0.50567218 -0.15662202 -0.09808374
## KM            -0.56996016  0.505672180  1.00000000 -0.33353795  0.10268289
## HP             0.31498983 -0.156622020 -0.33353795  1.00000000  0.03585580
## cc             0.12638920 -0.098083739  0.10268289  0.03585580  1.00000000
## Doors          0.18532555 -0.148359215 -0.03619661  0.09242450  0.07990330
## Gears          0.06310386 -0.005363947  0.01502333  0.20947715  0.01462935
## Quarterly_Tax  0.21919691 -0.198430508  0.27816470 -0.29843172  0.30699580
## Weight         0.58119759 -0.470253184 -0.02859846  0.08961406  0.33563740
##                     Doors        Gears Quarterly_Tax      Weight
## Price          0.18532555  0.063103857   0.219196911  0.58119759
## Age_08_04     -0.14835921 -0.005363947  -0.198430508 -0.47025318
## KM            -0.03619661  0.015023328   0.278164697 -0.02859846
## HP             0.09242450  0.209477146  -0.298431717  0.08961406
## cc             0.07990330  0.014629352   0.306995798  0.33563740
## Doors          1.00000000 -0.160141430   0.109363225  0.30261764
## Gears         -0.16014143  1.000000000  -0.005451955  0.02061328
## Quarterly_Tax  0.10936323 -0.005451955   1.000000000  0.62613373
## Weight         0.30261764  0.020613284   0.626133733  1.00000000
plot(mydata)

model <- lm(Price~., data = mydata)
summary(model) # R squared: 0.8638
## 
## Call:
## lm(formula = Price ~ ., data = mydata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9366.4  -793.3   -21.3   799.7  6444.0 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -5.573e+03  1.411e+03  -3.949 8.24e-05 ***
## Age_08_04     -1.217e+02  2.616e+00 -46.512  < 2e-16 ***
## KM            -2.082e-02  1.252e-03 -16.622  < 2e-16 ***
## HP             3.168e+01  2.818e+00  11.241  < 2e-16 ***
## cc            -1.211e-01  9.009e-02  -1.344  0.17909    
## Doors         -1.617e+00  4.001e+01  -0.040  0.96777    
## Gears          5.943e+02  1.971e+02   3.016  0.00261 ** 
## Quarterly_Tax  3.949e+00  1.310e+00   3.015  0.00262 ** 
## Weight         1.696e+01  1.068e+00  15.880  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1342 on 1427 degrees of freedom
## Multiple R-squared:  0.8638, Adjusted R-squared:  0.863 
## F-statistic:  1131 on 8 and 1427 DF,  p-value: < 2.2e-16
library(car)
## Warning: package 'car' was built under R version 3.5.1
## Loading required package: carData
#influence.measures(model)

influenceIndexPlot(model)

influencePlot(model)

##       StudRes       Hat      CookD
## 81   8.164500 0.9182368 79.5201062
## 222 -7.673262 0.1397116  1.0210312
## 961 -5.456195 0.1572484  0.6049996
vif(model)
##     Age_08_04            KM            HP            cc         Doors 
##      1.884620      1.756905      1.419422      1.163894      1.156575 
##         Gears Quarterly_Tax        Weight 
##      1.098723      2.311431      2.516420
model2 <- lm(Price~., data = mydata[-c(81,222,961),])
summary(model2) # R squared: 0.8852
## 
## Call:
## lm(formula = Price ~ ., data = mydata[-c(81, 222, 961), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8756.8  -761.3   -31.7   720.6  6306.6 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.474e+04  1.433e+03 -10.289  < 2e-16 ***
## Age_08_04     -1.120e+02  2.479e+00 -45.185  < 2e-16 ***
## KM            -1.699e-02  1.200e-03 -14.160  < 2e-16 ***
## HP             3.661e+01  2.745e+00  13.334  < 2e-16 ***
## cc            -3.795e+00  3.021e-01 -12.562  < 2e-16 ***
## Doors         -1.225e+02  3.748e+01  -3.270  0.00110 ** 
## Gears          4.650e+02  1.810e+02   2.569  0.01029 *  
## Quarterly_Tax  5.213e+00  1.371e+00   3.802  0.00015 ***
## Weight         3.064e+01  1.290e+00  23.748  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1231 on 1424 degrees of freedom
## Multiple R-squared:  0.8852, Adjusted R-squared:  0.8845 
## F-statistic:  1372 on 8 and 1424 DF,  p-value: < 2.2e-16
library(MASS)
stepAIC(model)
## Start:  AIC=20693.89
## Price ~ Age_08_04 + KM + HP + cc + Doors + Gears + Quarterly_Tax + 
##     Weight
## 
##                 Df  Sum of Sq        RSS   AIC
## - Doors          1       2943 2571786477 20692
## - cc             1    3256511 2575040045 20694
## <none>                        2571783534 20694
## - Quarterly_Tax  1   16377633 2588161166 20701
## - Gears          1   16393629 2588177163 20701
## - HP             1  227730786 2799514319 20814
## - Weight         1  454465243 3026248777 20926
## - KM             1  497917334 3069700867 20946
## - Age_08_04      1 3898860600 6470644134 22017
## 
## Step:  AIC=20691.89
## Price ~ Age_08_04 + KM + HP + cc + Gears + Quarterly_Tax + Weight
## 
##                 Df  Sum of Sq        RSS   AIC
## - cc             1    3254209 2575040686 20692
## <none>                        2571786477 20692
## - Quarterly_Tax  1   16503849 2588290326 20699
## - Gears          1   17093855 2588880332 20699
## - HP             1  228761929 2800548406 20812
## - Weight         1  484447009 3056233485 20938
## - KM             1  498427860 3070214337 20944
## - Age_08_04      1 3898877516 6470663993 22015
## 
## Step:  AIC=20691.7
## Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax + Weight
## 
##                 Df  Sum of Sq        RSS   AIC
## <none>                        2575040686 20692
## - Quarterly_Tax  1   14976762 2590017448 20698
## - Gears          1   17276597 2592317283 20699
## - HP             1  225684613 2800725299 20810
## - Weight         1  484245502 3059286188 20937
## - KM             1  506728527 3081769213 20948
## - Age_08_04      1 3902107988 6477148674 22014
## 
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax + 
##     Weight, data = mydata)
## 
## Coefficients:
##   (Intercept)      Age_08_04             KM             HP          Gears  
##    -5.478e+03     -1.217e+02     -2.094e-02      3.133e+01      5.990e+02  
## Quarterly_Tax         Weight  
##     3.737e+00      1.673e+01
avPlots(model)

model3 <- lm(Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax + Weight, data = mydata[-c(81,222,961),])
summary(model3) # R-Squared value : 0.8722
## 
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax + 
##     Weight, data = mydata[-c(81, 222, 961), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9324.5  -778.3   -17.5   764.0  6224.4 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.124e+04  1.482e+03  -7.584 6.01e-14 ***
## Age_08_04     -1.161e+02  2.591e+00 -44.812  < 2e-16 ***
## KM            -2.146e-02  1.209e-03 -17.753  < 2e-16 ***
## HP             2.625e+01  2.763e+00   9.503  < 2e-16 ***
## Gears          6.389e+02  1.871e+02   3.414 0.000657 ***
## Quarterly_Tax -7.066e-01  1.338e+00  -0.528 0.597431    
## Weight         2.251e+01  1.149e+00  19.587  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1298 on 1426 degrees of freedom
## Multiple R-squared:  0.8722, Adjusted R-squared:  0.8717 
## F-statistic:  1623 on 6 and 1426 DF,  p-value: < 2.2e-16
plot(model2)

hist(residuals(model2))

### Model2 is the final model with the accuracy of 88.52 %.