toyota <- read.csv("C:/Users/Shalini/Downloads/ToyotaCorolla.csv")
#View(toyota)
colnames(toyota)
## [1] "Id" "Model" "Price"
## [4] "Age_08_04" "Mfg_Month" "Mfg_Year"
## [7] "KM" "Fuel_Type" "HP"
## [10] "Met_Color" "Color" "Automatic"
## [13] "cc" "Doors" "Cylinders"
## [16] "Gears" "Quarterly_Tax" "Weight"
## [19] "Mfr_Guarantee" "BOVAG_Guarantee" "Guarantee_Period"
## [22] "ABS" "Airbag_1" "Airbag_2"
## [25] "Airco" "Automatic_airco" "Boardcomputer"
## [28] "CD_Player" "Central_Lock" "Powered_Windows"
## [31] "Power_Steering" "Radio" "Mistlamps"
## [34] "Sport_Model" "Backseat_Divider" "Metallic_Rim"
## [37] "Radio_cassette" "Tow_Bar"
plot(toyota$Price,toyota$Age_08_04)

summary(toyota)
## Id
## Min. : 1.0
## 1st Qu.: 361.8
## Median : 721.5
## Mean : 721.6
## 3rd Qu.:1081.2
## Max. :1442.0
##
## Model
## TOYOTA Corolla 1.6 16V HATCHB LINEA TERRA 2/3-Doors: 107
## TOYOTA Corolla 1.3 16V HATCHB LINEA TERRA 2/3-Doors: 83
## TOYOTA Corolla 1.6 16V LIFTB LINEA LUNA 4/5-Doors : 79
## TOYOTA Corolla 1.6 16V LIFTB LINEA TERRA 4/5-Doors : 70
## TOYOTA Corolla 1.6 16V SEDAN LINEA TERRA 4/5-Doors : 43
## TOYOTA Corolla 1.4 16V VVT I HATCHB TERRA 2/3-Doors: 42
## (Other) :1012
## Price Age_08_04 Mfg_Month Mfg_Year
## Min. : 4350 Min. : 1.00 Min. : 1.000 Min. :1998
## 1st Qu.: 8450 1st Qu.:44.00 1st Qu.: 3.000 1st Qu.:1998
## Median : 9900 Median :61.00 Median : 5.000 Median :1999
## Mean :10731 Mean :55.95 Mean : 5.549 Mean :2000
## 3rd Qu.:11950 3rd Qu.:70.00 3rd Qu.: 8.000 3rd Qu.:2001
## Max. :32500 Max. :80.00 Max. :12.000 Max. :2004
##
## KM Fuel_Type HP Met_Color
## Min. : 1 CNG : 17 Min. : 69.0 Min. :0.0000
## 1st Qu.: 43000 Diesel: 155 1st Qu.: 90.0 1st Qu.:0.0000
## Median : 63390 Petrol:1264 Median :110.0 Median :1.0000
## Mean : 68533 Mean :101.5 Mean :0.6748
## 3rd Qu.: 87021 3rd Qu.:110.0 3rd Qu.:1.0000
## Max. :243000 Max. :192.0 Max. :1.0000
##
## Color Automatic cc Doors
## Grey :301 Min. :0.00000 Min. : 1300 Min. :2.000
## Blue :283 1st Qu.:0.00000 1st Qu.: 1400 1st Qu.:3.000
## Red :278 Median :0.00000 Median : 1600 Median :4.000
## Green :220 Mean :0.05571 Mean : 1577 Mean :4.033
## Black :191 3rd Qu.:0.00000 3rd Qu.: 1600 3rd Qu.:5.000
## Silver :122 Max. :1.00000 Max. :16000 Max. :5.000
## (Other): 41
## Cylinders Gears Quarterly_Tax Weight
## Min. :4 Min. :3.000 Min. : 19.00 Min. :1000
## 1st Qu.:4 1st Qu.:5.000 1st Qu.: 69.00 1st Qu.:1040
## Median :4 Median :5.000 Median : 85.00 Median :1070
## Mean :4 Mean :5.026 Mean : 87.12 Mean :1072
## 3rd Qu.:4 3rd Qu.:5.000 3rd Qu.: 85.00 3rd Qu.:1085
## Max. :4 Max. :6.000 Max. :283.00 Max. :1615
##
## Mfr_Guarantee BOVAG_Guarantee Guarantee_Period ABS
## Min. :0.0000 Min. :0.0000 Min. : 3.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.: 3.000 1st Qu.:1.0000
## Median :0.0000 Median :1.0000 Median : 3.000 Median :1.0000
## Mean :0.4095 Mean :0.8955 Mean : 3.815 Mean :0.8134
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 3.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :36.000 Max. :1.0000
##
## Airbag_1 Airbag_2 Airco Automatic_airco
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :1.0000 Median :1.0000 Median :1.0000 Median :0.00000
## Mean :0.9708 Mean :0.7228 Mean :0.5084 Mean :0.05641
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
##
## Boardcomputer CD_Player Central_Lock Powered_Windows
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :1.000
## Mean :0.2946 Mean :0.2187 Mean :0.5801 Mean :0.562
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
##
## Power_Steering Radio Mistlamps Sport_Model
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.9777 Mean :0.1462 Mean :0.257 Mean :0.3001
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
##
## Backseat_Divider Metallic_Rim Radio_cassette Tow_Bar
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.7702 Mean :0.2047 Mean :0.1455 Mean :0.2779
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
##
var(toyota$Price)
## [1] 13154872
sd(toyota$Doors)
## [1] 0.9526766
str(toyota)
## 'data.frame': 1436 obs. of 38 variables:
## $ Id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Model : Factor w/ 372 levels "Â TOYOTA Corolla 1.3 16V HATCHB G6 2/3-Doors",..: 332 332 67 332 331 331 64 326 62 59 ...
## $ Price : int 13500 13750 13950 14950 13750 12950 16900 18600 21500 12950 ...
## $ Age_08_04 : int 23 23 24 26 30 32 27 30 27 23 ...
## $ Mfg_Month : int 10 10 9 7 3 1 6 3 6 10 ...
## $ Mfg_Year : int 2002 2002 2002 2002 2002 2002 2002 2002 2002 2002 ...
## $ KM : int 46986 72937 41711 48000 38500 61000 94612 75889 19700 71138 ...
## $ Fuel_Type : Factor w/ 3 levels "CNG","Diesel",..: 2 2 2 2 2 2 2 2 3 2 ...
## $ HP : int 90 90 90 90 90 90 90 90 192 69 ...
## $ Met_Color : int 1 1 1 0 0 0 1 1 0 0 ...
## $ Color : Factor w/ 10 levels "Beige","Black",..: 3 7 3 2 2 9 5 5 6 3 ...
## $ Automatic : int 0 0 0 0 0 0 0 0 0 0 ...
## $ cc : int 2000 2000 2000 2000 2000 2000 2000 2000 1800 1900 ...
## $ Doors : int 3 3 3 3 3 3 3 3 3 3 ...
## $ Cylinders : int 4 4 4 4 4 4 4 4 4 4 ...
## $ Gears : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Quarterly_Tax : int 210 210 210 210 210 210 210 210 100 185 ...
## $ Weight : int 1165 1165 1165 1165 1170 1170 1245 1245 1185 1105 ...
## $ Mfr_Guarantee : int 0 0 1 1 1 0 0 1 0 0 ...
## $ BOVAG_Guarantee : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Guarantee_Period: int 3 3 3 3 3 3 3 3 3 3 ...
## $ ABS : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Airbag_1 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Airbag_2 : int 1 1 1 1 1 1 1 1 0 1 ...
## $ Airco : int 0 1 0 0 1 1 1 1 1 1 ...
## $ Automatic_airco : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Boardcomputer : int 1 1 1 1 1 1 1 1 0 1 ...
## $ CD_Player : int 0 1 0 0 0 0 0 1 0 0 ...
## $ Central_Lock : int 1 1 0 0 1 1 1 1 1 0 ...
## $ Powered_Windows : int 1 0 0 0 1 1 1 1 1 0 ...
## $ Power_Steering : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Radio : int 0 0 0 0 0 0 0 0 1 0 ...
## $ Mistlamps : int 0 0 0 0 1 1 0 0 0 0 ...
## $ Sport_Model : int 0 0 0 0 0 0 1 0 0 0 ...
## $ Backseat_Divider: int 1 1 1 1 1 1 1 1 0 1 ...
## $ Metallic_Rim : int 0 0 0 0 0 0 0 0 1 0 ...
## $ Radio_cassette : int 0 0 0 0 0 0 0 0 1 0 ...
## $ Tow_Bar : int 0 0 0 0 0 0 0 0 0 0 ...
cor(toyota[c("Doors","HP","Gears")])
## Doors HP Gears
## Doors 1.0000000 0.0924245 -0.1601414
## HP 0.0924245 1.0000000 0.2094771
## Gears -0.1601414 0.2094771 1.0000000
Corolla<-toyota[c("Price","Age_08_04","KM","HP","cc","Doors","Gears","Quarterly_Tax","Weight")]
model <- lm(Price~Age_08_04+KM+HP+cc+Doors+Gears+Quarterly_Tax+Weight,data=Corolla)
summary(model)
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + cc + Doors + Gears +
## Quarterly_Tax + Weight, data = Corolla)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9366.4 -793.3 -21.3 799.7 6444.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.573e+03 1.411e+03 -3.949 8.24e-05 ***
## Age_08_04 -1.217e+02 2.616e+00 -46.512 < 2e-16 ***
## KM -2.082e-02 1.252e-03 -16.622 < 2e-16 ***
## HP 3.168e+01 2.818e+00 11.241 < 2e-16 ***
## cc -1.211e-01 9.009e-02 -1.344 0.17909
## Doors -1.617e+00 4.001e+01 -0.040 0.96777
## Gears 5.943e+02 1.971e+02 3.016 0.00261 **
## Quarterly_Tax 3.949e+00 1.310e+00 3.015 0.00262 **
## Weight 1.696e+01 1.068e+00 15.880 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1342 on 1427 degrees of freedom
## Multiple R-squared: 0.8638, Adjusted R-squared: 0.863
## F-statistic: 1131 on 8 and 1427 DF, p-value: < 2.2e-16
plot(model)




library(car)
## Loading required package: carData
avPlots(model)

library(MASS)
stepAIC(model)
## Start: AIC=20693.89
## Price ~ Age_08_04 + KM + HP + cc + Doors + Gears + Quarterly_Tax +
## Weight
##
## Df Sum of Sq RSS AIC
## - Doors 1 2943 2571786477 20692
## - cc 1 3256511 2575040045 20694
## <none> 2571783534 20694
## - Quarterly_Tax 1 16377633 2588161166 20701
## - Gears 1 16393629 2588177163 20701
## - HP 1 227730786 2799514319 20814
## - Weight 1 454465243 3026248777 20926
## - KM 1 497917334 3069700867 20946
## - Age_08_04 1 3898860600 6470644134 22017
##
## Step: AIC=20691.89
## Price ~ Age_08_04 + KM + HP + cc + Gears + Quarterly_Tax + Weight
##
## Df Sum of Sq RSS AIC
## - cc 1 3254209 2575040686 20692
## <none> 2571786477 20692
## - Quarterly_Tax 1 16503849 2588290326 20699
## - Gears 1 17093855 2588880332 20699
## - HP 1 228761929 2800548406 20812
## - Weight 1 484447009 3056233485 20938
## - KM 1 498427860 3070214337 20944
## - Age_08_04 1 3898877516 6470663993 22015
##
## Step: AIC=20691.7
## Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax + Weight
##
## Df Sum of Sq RSS AIC
## <none> 2575040686 20692
## - Quarterly_Tax 1 14976762 2590017448 20698
## - Gears 1 17276597 2592317283 20699
## - HP 1 225684613 2800725299 20810
## - Weight 1 484245502 3059286188 20937
## - KM 1 506728527 3081769213 20948
## - Age_08_04 1 3902107988 6477148674 22014
##
## Call:
## lm(formula = Price ~ Age_08_04 + KM + HP + Gears + Quarterly_Tax +
## Weight, data = Corolla)
##
## Coefficients:
## (Intercept) Age_08_04 KM HP Gears
## -5.478e+03 -1.217e+02 -2.094e-02 3.133e+01 5.990e+02
## Quarterly_Tax Weight
## 3.737e+00 1.673e+01