MLRQ1 <- read.csv("D:\\DataScience\\Assignments\\MultiLinearRegression\\ToyotaCorolla.csv")
attach(MLRQ1)
dataQ1 <- MLRQ1[c("Price","Age_08_04","KM","HP","cc","Doors","Gears","Quarterly_Tax","Weight")]
View(dataQ1)
summary(dataQ1)
## Price Age_08_04 KM HP
## Min. : 4350 Min. : 1.00 Min. : 1 Min. : 69.0
## 1st Qu.: 8450 1st Qu.:44.00 1st Qu.: 43000 1st Qu.: 90.0
## Median : 9900 Median :61.00 Median : 63390 Median :110.0
## Mean :10731 Mean :55.95 Mean : 68533 Mean :101.5
## 3rd Qu.:11950 3rd Qu.:70.00 3rd Qu.: 87021 3rd Qu.:110.0
## Max. :32500 Max. :80.00 Max. :243000 Max. :192.0
## cc Doors Gears Quarterly_Tax
## Min. : 1300 Min. :2.000 Min. :3.000 Min. : 19.00
## 1st Qu.: 1400 1st Qu.:3.000 1st Qu.:5.000 1st Qu.: 69.00
## Median : 1600 Median :4.000 Median :5.000 Median : 85.00
## Mean : 1577 Mean :4.033 Mean :5.026 Mean : 87.12
## 3rd Qu.: 1600 3rd Qu.:5.000 3rd Qu.:5.000 3rd Qu.: 85.00
## Max. :16000 Max. :5.000 Max. :6.000 Max. :283.00
## Weight
## Min. :1000
## 1st Qu.:1040
## Median :1070
## Mean :1072
## 3rd Qu.:1085
## Max. :1615
attach(dataQ1)
## The following objects are masked from MLRQ1:
##
## Age_08_04, cc, Doors, Gears, HP, KM, Price, Quarterly_Tax,
## Weight
plot(dataQ1)

cor(dataQ1)
## Price Age_08_04 KM HP cc
## Price 1.00000000 -0.876590497 -0.56996016 0.31498983 0.12638920
## Age_08_04 -0.87659050 1.000000000 0.50567218 -0.15662202 -0.09808374
## KM -0.56996016 0.505672180 1.00000000 -0.33353795 0.10268289
## HP 0.31498983 -0.156622020 -0.33353795 1.00000000 0.03585580
## cc 0.12638920 -0.098083739 0.10268289 0.03585580 1.00000000
## Doors 0.18532555 -0.148359215 -0.03619661 0.09242450 0.07990330
## Gears 0.06310386 -0.005363947 0.01502333 0.20947715 0.01462935
## Quarterly_Tax 0.21919691 -0.198430508 0.27816470 -0.29843172 0.30699580
## Weight 0.58119759 -0.470253184 -0.02859846 0.08961406 0.33563740
## Doors Gears Quarterly_Tax Weight
## Price 0.18532555 0.063103857 0.219196911 0.58119759
## Age_08_04 -0.14835921 -0.005363947 -0.198430508 -0.47025318
## KM -0.03619661 0.015023328 0.278164697 -0.02859846
## HP 0.09242450 0.209477146 -0.298431717 0.08961406
## cc 0.07990330 0.014629352 0.306995798 0.33563740
## Doors 1.00000000 -0.160141430 0.109363225 0.30261764
## Gears -0.16014143 1.000000000 -0.005451955 0.02061328
## Quarterly_Tax 0.10936323 -0.005451955 1.000000000 0.62613373
## Weight 0.30261764 0.020613284 0.626133733 1.00000000
#cor2pcor(cor(dataQ1))
modelQ1 <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight)
summary(modelQ1)
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears +
## Quarterly_Tax + Weight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9366.4 -793.3 -21.3 799.7 6444.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.573e+03 1.411e+03 -3.949 8.24e-05 ***
## Age_08_04 -1.217e+02 2.616e+00 -46.512 < 2e-16 ***
## KM -2.082e-02 1.252e-03 -16.622 < 2e-16 ***
## HP 3.168e+01 2.818e+00 11.241 < 2e-16 ***
## cc -1.211e-01 9.009e-02 -1.344 0.17909
## Doors -1.617e+00 4.001e+01 -0.040 0.96777
## Gears 5.943e+02 1.971e+02 3.016 0.00261 **
## Quarterly_Tax 3.949e+00 1.310e+00 3.015 0.00262 **
## Weight 1.696e+01 1.068e+00 15.880 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1342 on 1427 degrees of freedom
## Multiple R-squared: 0.8638, Adjusted R-squared: 0.863
## F-statistic: 1131 on 8 and 1427 DF, p-value: < 2.2e-16
modelQ1a <- lm(Price ~ cc+Doors)
summary(modelQ1a)
##
## Call:
## lm(formula = Price ~ cc + Doors)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7243.9 -2273.6 -821.3 1054.4 20714.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6509.4211 515.7732 12.621 < 2e-16 ***
## cc 0.9597 0.2211 4.340 1.52e-05 ***
## Doors 671.3973 98.5009 6.816 1.37e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3543 on 1433 degrees of freedom
## Multiple R-squared: 0.04688, Adjusted R-squared: 0.04555
## F-statistic: 35.24 on 2 and 1433 DF, p-value: 1.15e-15
#install.packages("car")
#library(car)
#install.packages("carData")
#index plots for infuence measures
#influence.measures(modelQ1)
#influenceIndexPlot(modelQ1)
#influencePlot(modelQ1,id.n=3)
modelQ1b <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight,data=MLRQ1[-81,])
summary(modelQ1b)
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears +
## Quarterly_Tax + Weight, data = MLRQ1[-81, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -11455.7 -761.7 -32.7 739.3 6739.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.285e+03 1.383e+03 -4.545 5.95e-06 ***
## Age_08_04 -1.205e+02 2.562e+00 -47.021 < 2e-16 ***
## KM -1.785e-02 1.277e-03 -13.973 < 2e-16 ***
## HP 3.935e+01 2.911e+00 13.516 < 2e-16 ***
## cc -2.524e+00 3.072e-01 -8.216 4.67e-16 ***
## Doors -2.723e+01 3.924e+01 -0.694 0.48788
## Gears 5.239e+02 1.929e+02 2.717 0.00667 **
## Quarterly_Tax 9.044e+00 1.425e+00 6.348 2.93e-10 ***
## Weight 2.017e+01 1.116e+00 18.076 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1313 on 1426 degrees of freedom
## Multiple R-squared: 0.8694, Adjusted R-squared: 0.8686
## F-statistic: 1186 on 8 and 1426 DF, p-value: < 2.2e-16
modelQ1c <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight,data=MLRQ1[-c(81,222,961),])
summary(modelQ1c)
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears +
## Quarterly_Tax + Weight, data = MLRQ1[-c(81, 222, 961), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -8756.8 -761.3 -31.7 720.6 6306.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.474e+04 1.433e+03 -10.289 < 2e-16 ***
## Age_08_04 -1.120e+02 2.479e+00 -45.185 < 2e-16 ***
## KM -1.699e-02 1.200e-03 -14.160 < 2e-16 ***
## HP 3.661e+01 2.745e+00 13.334 < 2e-16 ***
## cc -3.795e+00 3.021e-01 -12.562 < 2e-16 ***
## Doors -1.225e+02 3.748e+01 -3.270 0.00110 **
## Gears 4.650e+02 1.810e+02 2.569 0.01029 *
## Quarterly_Tax 5.213e+00 1.371e+00 3.802 0.00015 ***
## Weight 3.064e+01 1.290e+00 23.748 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1231 on 1424 degrees of freedom
## Multiple R-squared: 0.8852, Adjusted R-squared: 0.8845
## F-statistic: 1372 on 8 and 1424 DF, p-value: < 2.2e-16
#vif(modelQ1)
# Variance Inflation factor to check collinearity b/n variables
#vif>10 then there exists collinearity among all the variable
#avPlots(modelQ1)
## Added Variable plot to check correlation b/n variables and o/p variable
FinalModelQ1 <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight,data=MLRQ1[-c(81,222,961),])
summary(FinalModelQ1)
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears +
## Quarterly_Tax + Weight, data = MLRQ1[-c(81, 222, 961), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -8756.8 -761.3 -31.7 720.6 6306.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.474e+04 1.433e+03 -10.289 < 2e-16 ***
## Age_08_04 -1.120e+02 2.479e+00 -45.185 < 2e-16 ***
## KM -1.699e-02 1.200e-03 -14.160 < 2e-16 ***
## HP 3.661e+01 2.745e+00 13.334 < 2e-16 ***
## cc -3.795e+00 3.021e-01 -12.562 < 2e-16 ***
## Doors -1.225e+02 3.748e+01 -3.270 0.00110 **
## Gears 4.650e+02 1.810e+02 2.569 0.01029 *
## Quarterly_Tax 5.213e+00 1.371e+00 3.802 0.00015 ***
## Weight 3.064e+01 1.290e+00 23.748 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1231 on 1424 degrees of freedom
## Multiple R-squared: 0.8852, Adjusted R-squared: 0.8845
## F-statistic: 1372 on 8 and 1424 DF, p-value: < 2.2e-16
#Evelute model LINE Assumptions
plot(FinalModelQ1)




#Residual plots,QQplot,std-Residuals Vs Fitted,Cook's Distance
#qqPlot(modelQ1,id.n=5)
# QQ plot of studentized residuals helps in identifying outlier
#install.packages("caret")
#install.packages("lattice")
#install.packages("ggplot2")
#library(MASS)
#x<- stepAIC(modelQ1)