Session 13

Ramya
November 6, 2017

List the data types of each column

setwd("~/Desktop/ACADS/Term 5/DAM")
cars<-read.csv('CarSeatsDataV5.csv')
str(cars)
'data.frame':   400 obs. of  13 variables:
 $ Sales      : num  9.5 4.15 10.81 9.01 10.14 ...
 $ CompPrice  : int  138 141 124 121 145 103 104 130 119 157 ...
 $ Income     : int  73 64 113 78 119 74 99 60 98 53 ...
 $ Advertising: int  11 3 13 9 16 0 15 0 0 0 ...
 $ Population : int  276 340 501 150 294 359 226 144 18 403 ...
 $ Price      : int  120 128 72 100 113 97 102 138 126 124 ...
 $ ShelveLoc  : Factor w/ 3 levels "0-Bad","1-Medium",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Age        : int  42 38 78 26 42 55 58 38 73 58 ...
 $ Education  : int  17 13 16 10 12 11 17 10 17 16 ...
 $ Urban      : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 2 2 1 1 2 ...
 $ US         : Factor w/ 2 levels "No","Yes": 2 1 2 2 2 2 2 1 1 1 ...
 $ Revenue    : num  1140 531 778 901 1146 ...
 $ Profit     : num  228 106 156 180 229 ...

Model 0

Model0<-lm(cars$Profit~cars$Advertising)
summary(Model0)

Call:
lm(formula = cars$Profit ~ cars$Advertising)

Residuals:
    Min      1Q  Median      3Q     Max 
-171.85  -34.38   -3.78   35.97  168.85 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      147.3290     4.0320  36.540  < 2e-16 ***
cars$Advertising   3.0660     0.4295   7.139 4.49e-12 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 57.05 on 398 degrees of freedom
Multiple R-squared:  0.1135,    Adjusted R-squared:  0.1113 
F-statistic: 50.97 on 1 and 398 DF,  p-value: 4.493e-12

Model 1

Model1<-lm(cars$Profit~cars$ShelveLoc)
summary(Model1)

Call:
lm(formula = cars$Profit ~ cars$ShelveLoc)

Residuals:
     Min       1Q   Median       3Q      Max 
-163.350  -33.330   -1.365   31.033  153.050 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)             120.041      4.808   24.97  < 2e-16 ***
cars$ShelveLoc1-Medium   43.309      5.767    7.51 3.93e-13 ***
cars$ShelveLoc2-Good    112.558      7.016   16.04  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 47.11 on 397 degrees of freedom
Multiple R-squared:  0.3971,    Adjusted R-squared:  0.394 
F-statistic: 130.7 on 2 and 397 DF,  p-value: < 2.2e-16

Model 2

Model2<-lm(cars$Profit~cars$Advertising+cars$ShelveLoc)
summary(Model2)

Call:
lm(formula = cars$Profit ~ cars$Advertising + cars$ShelveLoc)

Residuals:
     Min       1Q   Median       3Q      Max 
-145.446  -25.160    0.039   24.796  104.054 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)             103.014      4.886  21.085  < 2e-16 ***
cars$Advertising          2.738      0.328   8.347 1.18e-15 ***
cars$ShelveLoc1-Medium   42.433      5.325   7.968 1.73e-14 ***
cars$ShelveLoc2-Good    109.453      6.489  16.867  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 43.5 on 396 degrees of freedom
Multiple R-squared:  0.4873,    Adjusted R-squared:  0.4834 
F-statistic: 125.4 on 3 and 396 DF,  p-value: < 2.2e-16

Model 3

Model3<-lm(cars$Profit~cars$Advertising+cars$ShelveLoc+cars$Advertising:cars$ShelveLoc)
summary(Model3)

Call:
lm(formula = cars$Profit ~ cars$Advertising + cars$ShelveLoc + 
    cars$Advertising:cars$ShelveLoc)

Residuals:
     Min       1Q   Median       3Q      Max 
-143.299  -24.554    1.032   24.480  106.201 

Coefficients:
                                        Estimate Std. Error t value
(Intercept)                             108.6051     6.1743  17.590
cars$Advertising                          1.8390     0.6903   2.664
cars$ShelveLoc1-Medium                   34.6937     7.4206   4.675
cars$ShelveLoc2-Good                    103.1563     9.3093  11.081
cars$Advertising:cars$ShelveLoc1-Medium   1.2276     0.8190   1.499
cars$Advertising:cars$ShelveLoc2-Good     0.9950     0.9812   1.014
                                        Pr(>|t|)    
(Intercept)                              < 2e-16 ***
cars$Advertising                         0.00803 ** 
cars$ShelveLoc1-Medium                  4.04e-06 ***
cars$ShelveLoc2-Good                     < 2e-16 ***
cars$Advertising:cars$ShelveLoc1-Medium  0.13472    
cars$Advertising:cars$ShelveLoc2-Good    0.31118    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 43.48 on 394 degrees of freedom
Multiple R-squared:  0.4902,    Adjusted R-squared:  0.4837 
F-statistic: 75.77 on 5 and 394 DF,  p-value: < 2.2e-16

Model 4

Model4<-lm(cars$Profit~cars$Advertising+cars$ShelveLoc+cars$CompPrice+cars$Income+cars$Population+cars$Age+cars$Education+cars$Urban+cars$US)
summary(Model4)

Call:
lm(formula = cars$Profit ~ cars$Advertising + cars$ShelveLoc + 
    cars$CompPrice + cars$Income + cars$Population + cars$Age + 
    cars$Education + cars$Urban + cars$US)

Residuals:
     Min       1Q   Median       3Q      Max 
-159.654  -17.856    2.075   20.401   68.740 

Coefficients:
                         Estimate Std. Error t value Pr(>|t|)    
(Intercept)            -60.136334  18.171281  -3.309  0.00102 ** 
cars$Advertising         2.959638   0.334864   8.838  < 2e-16 ***
cars$ShelveLoc1-Medium  44.625673   3.797403  11.752  < 2e-16 ***
cars$ShelveLoc2-Good   109.212841   4.608355  23.699  < 2e-16 ***
cars$CompPrice           1.563453   0.101946  15.336  < 2e-16 ***
cars$Income              0.370657   0.055560   6.671 8.73e-11 ***
cars$Population          0.005803   0.011149   0.521  0.60299    
cars$Age                -0.952238   0.095685  -9.952  < 2e-16 ***
cars$Education          -0.679343   0.593883  -1.144  0.25337    
cars$UrbanYes            2.981043   3.402003   0.876  0.38143    
cars$USYes              -5.783463   4.511614  -1.282  0.20064    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 30.69 on 389 degrees of freedom
Multiple R-squared:  0.7493,    Adjusted R-squared:  0.7429 
F-statistic: 116.3 on 10 and 389 DF,  p-value: < 2.2e-16

Synopsis of models

c1<-c("Feature","Model 0", "Model 1", "Model 2", "Model 3", "Model 4")
c2<-c("R-Squared",0.1135,0.3971,0.4834,0.4837, 0.7429)
c3<-c("p-value",4.493e-12,2.2e-16,2.2e-16,2.2e-16,2.2e-16)
x<-data.frame(c1,c2,c3)
x
       c1        c2        c3
1 Feature R-Squared   p-value
2 Model 0    0.1135 4.493e-12
3 Model 1    0.3971   2.2e-16
4 Model 2    0.4834   2.2e-16
5 Model 3    0.4837   2.2e-16
6 Model 4    0.7429   2.2e-16

Model4 is a better model