MLRQ1 <- read.csv("D:\\DataScience\\Assignments\\MultiLinearRegression\\ToyotaCorolla.csv")
attach(MLRQ1)

dataQ1 <- MLRQ1[c("Price","Age_08_04","KM","HP","cc","Doors","Gears","Quarterly_Tax","Weight")]


View(dataQ1)
summary(dataQ1)
##      Price         Age_08_04           KM               HP       
##  Min.   : 4350   Min.   : 1.00   Min.   :     1   Min.   : 69.0  
##  1st Qu.: 8450   1st Qu.:44.00   1st Qu.: 43000   1st Qu.: 90.0  
##  Median : 9900   Median :61.00   Median : 63390   Median :110.0  
##  Mean   :10731   Mean   :55.95   Mean   : 68533   Mean   :101.5  
##  3rd Qu.:11950   3rd Qu.:70.00   3rd Qu.: 87021   3rd Qu.:110.0  
##  Max.   :32500   Max.   :80.00   Max.   :243000   Max.   :192.0  
##        cc            Doors           Gears       Quarterly_Tax   
##  Min.   : 1300   Min.   :2.000   Min.   :3.000   Min.   : 19.00  
##  1st Qu.: 1400   1st Qu.:3.000   1st Qu.:5.000   1st Qu.: 69.00  
##  Median : 1600   Median :4.000   Median :5.000   Median : 85.00  
##  Mean   : 1577   Mean   :4.033   Mean   :5.026   Mean   : 87.12  
##  3rd Qu.: 1600   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.: 85.00  
##  Max.   :16000   Max.   :5.000   Max.   :6.000   Max.   :283.00  
##      Weight    
##  Min.   :1000  
##  1st Qu.:1040  
##  Median :1070  
##  Mean   :1072  
##  3rd Qu.:1085  
##  Max.   :1615
attach(dataQ1)
## The following objects are masked from MLRQ1:
## 
##     Age_08_04, cc, Doors, Gears, HP, KM, Price, Quarterly_Tax,
##     Weight
plot(dataQ1)

cor(dataQ1)
##                     Price    Age_08_04          KM          HP          cc
## Price          1.00000000 -0.876590497 -0.56996016  0.31498983  0.12638920
## Age_08_04     -0.87659050  1.000000000  0.50567218 -0.15662202 -0.09808374
## KM            -0.56996016  0.505672180  1.00000000 -0.33353795  0.10268289
## HP             0.31498983 -0.156622020 -0.33353795  1.00000000  0.03585580
## cc             0.12638920 -0.098083739  0.10268289  0.03585580  1.00000000
## Doors          0.18532555 -0.148359215 -0.03619661  0.09242450  0.07990330
## Gears          0.06310386 -0.005363947  0.01502333  0.20947715  0.01462935
## Quarterly_Tax  0.21919691 -0.198430508  0.27816470 -0.29843172  0.30699580
## Weight         0.58119759 -0.470253184 -0.02859846  0.08961406  0.33563740
##                     Doors        Gears Quarterly_Tax      Weight
## Price          0.18532555  0.063103857   0.219196911  0.58119759
## Age_08_04     -0.14835921 -0.005363947  -0.198430508 -0.47025318
## KM            -0.03619661  0.015023328   0.278164697 -0.02859846
## HP             0.09242450  0.209477146  -0.298431717  0.08961406
## cc             0.07990330  0.014629352   0.306995798  0.33563740
## Doors          1.00000000 -0.160141430   0.109363225  0.30261764
## Gears         -0.16014143  1.000000000  -0.005451955  0.02061328
## Quarterly_Tax  0.10936323 -0.005451955   1.000000000  0.62613373
## Weight         0.30261764  0.020613284   0.626133733  1.00000000
#cor2pcor(cor(dataQ1))
modelQ1 <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight)
summary(modelQ1)
## 
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears + 
##     Quarterly_Tax + Weight)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9366.4  -793.3   -21.3   799.7  6444.0 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -5.573e+03  1.411e+03  -3.949 8.24e-05 ***
## Age_08_04     -1.217e+02  2.616e+00 -46.512  < 2e-16 ***
## KM            -2.082e-02  1.252e-03 -16.622  < 2e-16 ***
## HP             3.168e+01  2.818e+00  11.241  < 2e-16 ***
## cc            -1.211e-01  9.009e-02  -1.344  0.17909    
## Doors         -1.617e+00  4.001e+01  -0.040  0.96777    
## Gears          5.943e+02  1.971e+02   3.016  0.00261 ** 
## Quarterly_Tax  3.949e+00  1.310e+00   3.015  0.00262 ** 
## Weight         1.696e+01  1.068e+00  15.880  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1342 on 1427 degrees of freedom
## Multiple R-squared:  0.8638, Adjusted R-squared:  0.863 
## F-statistic:  1131 on 8 and 1427 DF,  p-value: < 2.2e-16
modelQ1a <- lm(Price ~ cc+Doors)

summary(modelQ1a)
## 
## Call:
## lm(formula = Price ~ cc + Doors)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7243.9 -2273.6  -821.3  1054.4 20714.1 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6509.4211   515.7732  12.621  < 2e-16 ***
## cc             0.9597     0.2211   4.340 1.52e-05 ***
## Doors        671.3973    98.5009   6.816 1.37e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3543 on 1433 degrees of freedom
## Multiple R-squared:  0.04688,    Adjusted R-squared:  0.04555 
## F-statistic: 35.24 on 2 and 1433 DF,  p-value: 1.15e-15
#install.packages("car")
#library(car)
#install.packages("carData")
#index plots for infuence measures
#influence.measures(modelQ1)

#influenceIndexPlot(modelQ1)
#influencePlot(modelQ1,id.n=3)
modelQ1b <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight,data=MLRQ1[-81,])

summary(modelQ1b)
## 
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears + 
##     Quarterly_Tax + Weight, data = MLRQ1[-81, ])
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11455.7   -761.7    -32.7    739.3   6739.7 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -6.285e+03  1.383e+03  -4.545 5.95e-06 ***
## Age_08_04     -1.205e+02  2.562e+00 -47.021  < 2e-16 ***
## KM            -1.785e-02  1.277e-03 -13.973  < 2e-16 ***
## HP             3.935e+01  2.911e+00  13.516  < 2e-16 ***
## cc            -2.524e+00  3.072e-01  -8.216 4.67e-16 ***
## Doors         -2.723e+01  3.924e+01  -0.694  0.48788    
## Gears          5.239e+02  1.929e+02   2.717  0.00667 ** 
## Quarterly_Tax  9.044e+00  1.425e+00   6.348 2.93e-10 ***
## Weight         2.017e+01  1.116e+00  18.076  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1313 on 1426 degrees of freedom
## Multiple R-squared:  0.8694, Adjusted R-squared:  0.8686 
## F-statistic:  1186 on 8 and 1426 DF,  p-value: < 2.2e-16
modelQ1c <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight,data=MLRQ1[-c(81,222,961),])

summary(modelQ1c)
## 
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears + 
##     Quarterly_Tax + Weight, data = MLRQ1[-c(81, 222, 961), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8756.8  -761.3   -31.7   720.6  6306.6 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.474e+04  1.433e+03 -10.289  < 2e-16 ***
## Age_08_04     -1.120e+02  2.479e+00 -45.185  < 2e-16 ***
## KM            -1.699e-02  1.200e-03 -14.160  < 2e-16 ***
## HP             3.661e+01  2.745e+00  13.334  < 2e-16 ***
## cc            -3.795e+00  3.021e-01 -12.562  < 2e-16 ***
## Doors         -1.225e+02  3.748e+01  -3.270  0.00110 ** 
## Gears          4.650e+02  1.810e+02   2.569  0.01029 *  
## Quarterly_Tax  5.213e+00  1.371e+00   3.802  0.00015 ***
## Weight         3.064e+01  1.290e+00  23.748  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1231 on 1424 degrees of freedom
## Multiple R-squared:  0.8852, Adjusted R-squared:  0.8845 
## F-statistic:  1372 on 8 and 1424 DF,  p-value: < 2.2e-16
#vif(modelQ1)

# Variance Inflation factor to check collinearity b/n variables

#vif>10 then there exists collinearity among all the variable
#avPlots(modelQ1)




## Added Variable plot to check correlation b/n variables and o/p variable
FinalModelQ1 <- lm(Price ~ Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight,data=MLRQ1[-c(81,222,961),])
summary(FinalModelQ1)
## 
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears + 
##     Quarterly_Tax + Weight, data = MLRQ1[-c(81, 222, 961), ])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8756.8  -761.3   -31.7   720.6  6306.6 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -1.474e+04  1.433e+03 -10.289  < 2e-16 ***
## Age_08_04     -1.120e+02  2.479e+00 -45.185  < 2e-16 ***
## KM            -1.699e-02  1.200e-03 -14.160  < 2e-16 ***
## HP             3.661e+01  2.745e+00  13.334  < 2e-16 ***
## cc            -3.795e+00  3.021e-01 -12.562  < 2e-16 ***
## Doors         -1.225e+02  3.748e+01  -3.270  0.00110 ** 
## Gears          4.650e+02  1.810e+02   2.569  0.01029 *  
## Quarterly_Tax  5.213e+00  1.371e+00   3.802  0.00015 ***
## Weight         3.064e+01  1.290e+00  23.748  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1231 on 1424 degrees of freedom
## Multiple R-squared:  0.8852, Adjusted R-squared:  0.8845 
## F-statistic:  1372 on 8 and 1424 DF,  p-value: < 2.2e-16
#Evelute model LINE Assumptions

plot(FinalModelQ1)

#Residual plots,QQplot,std-Residuals Vs Fitted,Cook's Distance 
#qqPlot(modelQ1,id.n=5)
# QQ plot of studentized residuals helps in identifying outlier

#install.packages("caret")
#install.packages("lattice")
#install.packages("ggplot2")
#library(MASS)

#x<- stepAIC(modelQ1)