Ramya
November 6, 2017
setwd("~/Desktop/ACADS/Term 5/DAM")
cars<-read.csv('CarSeatsDataV5.csv')
str(cars)
'data.frame': 400 obs. of 13 variables:
$ Sales : num 9.5 4.15 10.81 9.01 10.14 ...
$ CompPrice : int 138 141 124 121 145 103 104 130 119 157 ...
$ Income : int 73 64 113 78 119 74 99 60 98 53 ...
$ Advertising: int 11 3 13 9 16 0 15 0 0 0 ...
$ Population : int 276 340 501 150 294 359 226 144 18 403 ...
$ Price : int 120 128 72 100 113 97 102 138 126 124 ...
$ ShelveLoc : Factor w/ 3 levels "0-Bad","1-Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Age : int 42 38 78 26 42 55 58 38 73 58 ...
$ Education : int 17 13 16 10 12 11 17 10 17 16 ...
$ Urban : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 2 1 1 2 ...
$ US : Factor w/ 2 levels "No","Yes": 2 1 2 2 2 2 2 1 1 1 ...
$ Revenue : num 1140 531 778 901 1146 ...
$ Profit : num 228 106 156 180 229 ...
Model0<-lm(cars$Profit~cars$Advertising)
summary(Model0)
Call:
lm(formula = cars$Profit ~ cars$Advertising)
Residuals:
Min 1Q Median 3Q Max
-171.85 -34.38 -3.78 35.97 168.85
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 147.3290 4.0320 36.540 < 2e-16 ***
cars$Advertising 3.0660 0.4295 7.139 4.49e-12 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 57.05 on 398 degrees of freedom
Multiple R-squared: 0.1135, Adjusted R-squared: 0.1113
F-statistic: 50.97 on 1 and 398 DF, p-value: 4.493e-12
Model1<-lm(cars$Profit~cars$ShelveLoc)
summary(Model1)
Call:
lm(formula = cars$Profit ~ cars$ShelveLoc)
Residuals:
Min 1Q Median 3Q Max
-163.350 -33.330 -1.365 31.033 153.050
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 120.041 4.808 24.97 < 2e-16 ***
cars$ShelveLoc1-Medium 43.309 5.767 7.51 3.93e-13 ***
cars$ShelveLoc2-Good 112.558 7.016 16.04 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 47.11 on 397 degrees of freedom
Multiple R-squared: 0.3971, Adjusted R-squared: 0.394
F-statistic: 130.7 on 2 and 397 DF, p-value: < 2.2e-16
Model2<-lm(cars$Profit~cars$Advertising+cars$ShelveLoc)
summary(Model2)
Call:
lm(formula = cars$Profit ~ cars$Advertising + cars$ShelveLoc)
Residuals:
Min 1Q Median 3Q Max
-145.446 -25.160 0.039 24.796 104.054
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 103.014 4.886 21.085 < 2e-16 ***
cars$Advertising 2.738 0.328 8.347 1.18e-15 ***
cars$ShelveLoc1-Medium 42.433 5.325 7.968 1.73e-14 ***
cars$ShelveLoc2-Good 109.453 6.489 16.867 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 43.5 on 396 degrees of freedom
Multiple R-squared: 0.4873, Adjusted R-squared: 0.4834
F-statistic: 125.4 on 3 and 396 DF, p-value: < 2.2e-16
Model3<-lm(cars$Profit~cars$Advertising+cars$ShelveLoc+cars$Advertising:cars$ShelveLoc)
summary(Model3)
Call:
lm(formula = cars$Profit ~ cars$Advertising + cars$ShelveLoc +
cars$Advertising:cars$ShelveLoc)
Residuals:
Min 1Q Median 3Q Max
-143.299 -24.554 1.032 24.480 106.201
Coefficients:
Estimate Std. Error t value
(Intercept) 108.6051 6.1743 17.590
cars$Advertising 1.8390 0.6903 2.664
cars$ShelveLoc1-Medium 34.6937 7.4206 4.675
cars$ShelveLoc2-Good 103.1563 9.3093 11.081
cars$Advertising:cars$ShelveLoc1-Medium 1.2276 0.8190 1.499
cars$Advertising:cars$ShelveLoc2-Good 0.9950 0.9812 1.014
Pr(>|t|)
(Intercept) < 2e-16 ***
cars$Advertising 0.00803 **
cars$ShelveLoc1-Medium 4.04e-06 ***
cars$ShelveLoc2-Good < 2e-16 ***
cars$Advertising:cars$ShelveLoc1-Medium 0.13472
cars$Advertising:cars$ShelveLoc2-Good 0.31118
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 43.48 on 394 degrees of freedom
Multiple R-squared: 0.4902, Adjusted R-squared: 0.4837
F-statistic: 75.77 on 5 and 394 DF, p-value: < 2.2e-16
Model4<-lm(cars$Profit~cars$Advertising+cars$ShelveLoc+cars$CompPrice+cars$Income+cars$Population+cars$Age+cars$Education+cars$Urban+cars$US)
summary(Model4)
Call:
lm(formula = cars$Profit ~ cars$Advertising + cars$ShelveLoc +
cars$CompPrice + cars$Income + cars$Population + cars$Age +
cars$Education + cars$Urban + cars$US)
Residuals:
Min 1Q Median 3Q Max
-159.654 -17.856 2.075 20.401 68.740
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -60.136334 18.171281 -3.309 0.00102 **
cars$Advertising 2.959638 0.334864 8.838 < 2e-16 ***
cars$ShelveLoc1-Medium 44.625673 3.797403 11.752 < 2e-16 ***
cars$ShelveLoc2-Good 109.212841 4.608355 23.699 < 2e-16 ***
cars$CompPrice 1.563453 0.101946 15.336 < 2e-16 ***
cars$Income 0.370657 0.055560 6.671 8.73e-11 ***
cars$Population 0.005803 0.011149 0.521 0.60299
cars$Age -0.952238 0.095685 -9.952 < 2e-16 ***
cars$Education -0.679343 0.593883 -1.144 0.25337
cars$UrbanYes 2.981043 3.402003 0.876 0.38143
cars$USYes -5.783463 4.511614 -1.282 0.20064
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 30.69 on 389 degrees of freedom
Multiple R-squared: 0.7493, Adjusted R-squared: 0.7429
F-statistic: 116.3 on 10 and 389 DF, p-value: < 2.2e-16
c1<-c("Feature","Model 0", "Model 1", "Model 2", "Model 3", "Model 4")
c2<-c("R-Squared",0.1135,0.3971,0.4834,0.4837, 0.7429)
c3<-c("p-value",4.493e-12,2.2e-16,2.2e-16,2.2e-16,2.2e-16)
x<-data.frame(c1,c2,c3)
x
c1 c2 c3
1 Feature R-Squared p-value
2 Model 0 0.1135 4.493e-12
3 Model 1 0.3971 2.2e-16
4 Model 2 0.4834 2.2e-16
5 Model 3 0.4837 2.2e-16
6 Model 4 0.7429 2.2e-16
Model4 is a better model