Session14

x=read.csv("AdvertisingDataV2.csv")

head(x)

##   adType pageViews phoneCalls reservations businessID restaurantType
## 1 No Ads       643         44           39          1          chain
## 2 No Ads       621         41           44          2          chain
## 3 No Ads       581         40           38          3          chain
## 4 No Ads       592         35           31          4          chain
## 5 No Ads       648         45           46          5          chain
## 6 No Ads       519         37           41          6          chain

unique(x$adType)

## [1] No Ads   Curr Ads New Ads 
## Levels: Curr Ads New Ads No Ads

unique(x$restaurantType)

## [1] chain       independent
## Levels: chain independent

aggregate(x$reservations,by=list(x$adType,x$restaurantType),sum)

##    Group.1     Group.2      x
## 1 Curr Ads       chain 160404
## 2  New Ads       chain 192008
## 3   No Ads       chain 159700
## 4 Curr Ads independent 179808
## 5  New Ads independent 224797
## 6   No Ads independent 179904

library(psych)
describe(x$reservations)

##    vars     n  mean   sd median trimmed  mad min max range skew kurtosis
## X1    1 30000 36.55 7.99     36   35.97 7.41  15  79    64 0.78     0.88
##      se
## X1 0.05

boxplot(x$reservations~x$restaurantType)

library(gplots)

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

plotmeans(x$reservations~x$restaurantType)

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

plotmeans(x$reservations~x$adType)

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

anov1=aov(x$reservations~x$restaurantType)
summary(anov1)

##                     Df  Sum Sq Mean Sq F value Pr(>F)    
## x$restaurantType     1  749570  749570   19273 <2e-16 ***
## Residuals        29998 1166677      39                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Normality of dependent variable
library(nortest)
with(x,tapply(reservations,restaurantType,ad.test))

## $chain
## 
##  Anderson-Darling normality test
## 
## data:  X[[i]]
## A = 93.672, p-value < 2.2e-16
## 
## 
## $independent
## 
##  Anderson-Darling normality test
## 
## data:  X[[i]]
## A = 73.776, p-value < 2.2e-16

#Homogeneity of variance
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

leveneTest(x$reservations,x$restaurantType)

## Levene's Test for Homogeneity of Variance (center = median)
##          Df F value    Pr(>F)    
## group     1  1045.8 < 2.2e-16 ***
##       29998                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(anov1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = x$reservations ~ x$restaurantType)
## 
## $`x$restaurantType`
##                        diff       lwr       upr p adj
## independent-chain -10.20328 -10.34733 -10.05923     0

plot(TukeyHSD(anov1))

x$logres=log(x$reservations)
anov2=aov(x$logres~x$restaurantType)
summary(anov2)

##                     Df Sum Sq Mean Sq F value Pr(>F)    
## x$restaurantType     1  530.9   530.9   19138 <2e-16 ***
## Residuals        29998  832.1     0.0                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

aggregate(x$reservations,by=list(x$adType,x$restaurantType),sum)

##    Group.1     Group.2      x
## 1 Curr Ads       chain 160404
## 2  New Ads       chain 192008
## 3   No Ads       chain 159700
## 4 Curr Ads independent 179808
## 5  New Ads independent 224797
## 6   No Ads independent 179904

aggregate(x$pageViews,by=list(x$adType,x$restaurantType),sum)

##    Group.1     Group.2       x
## 1 Curr Ads       chain 2761587
## 2  New Ads       chain 2762284
## 3   No Ads       chain 2398285
## 4 Curr Ads independent 2250321
## 5  New Ads independent 2069826
## 6   No Ads independent 1799509

aggregate(x$phoneCalls,by=list(x$adType,x$restaurantType),sum)

##    Group.1     Group.2      x
## 1 Curr Ads       chain 176085
## 2  New Ads       chain 192315
## 3   No Ads       chain 160277
## 4 Curr Ads independent 197800
## 5  New Ads independent 224830
## 6   No Ads independent 179919

library(data.table)
dt <- data.table(x)
dt[, list(Count = .N,
        mean = round(mean(reservations), 3),
        sd = round(mean(reservations), 3),
        median = round(median(reservations), 3),
        min = min(reservations),
        max = max(reservations)),
   by = list(restaurantType,adType)]

##    restaurantType   adType Count   mean     sd median min max
## 1:          chain   No Ads  4000 39.925 39.925     40  20  58
## 2:          chain Curr Ads  4000 40.101 40.101     40  23  59
## 3:          chain  New Ads  4000 48.002 48.002     48  18  79
## 4:    independent   No Ads  6000 29.984 29.984     30  15  48
## 5:    independent Curr Ads  6000 29.968 29.968     30  18  43
## 6:    independent  New Ads  6000 37.466 37.466     37  23  51

dt[, list(Count = .N,
        mean = round(mean(phoneCalls), 3),
        sd = round(mean(phoneCalls), 3),
        median = round(median(phoneCalls), 3),
        min = min(phoneCalls),
        max = max(phoneCalls)),
   by = list(restaurantType,adType)]

##    restaurantType   adType Count   mean     sd median min max
## 1:          chain   No Ads  4000 40.069 40.069     40  22  58
## 2:          chain Curr Ads  4000 44.021 44.021     44  25  63
## 3:          chain  New Ads  4000 48.079 48.079     48  19  77
## 4:    independent   No Ads  6000 29.986 29.986     30  17  45
## 5:    independent Curr Ads  6000 32.967 32.967     33  20  50
## 6:    independent  New Ads  6000 37.472 37.472     37  22  53

dt[, list(Count = .N,
        mean = round(mean(pageViews), 3),
        sd = round(mean(pageViews), 3),
        median = round(median(pageViews), 3),
        min = min(pageViews),
        max = max(pageViews)),
   by = list(restaurantType,adType)]

##    restaurantType   adType Count    mean      sd median min max
## 1:          chain   No Ads  4000 599.571 599.571    599 437 766
## 2:          chain Curr Ads  4000 690.397 690.397    690 444 929
## 3:          chain  New Ads  4000 690.571 690.571    690 440 918
## 4:    independent   No Ads  6000 299.918 299.918    300 145 450
## 5:    independent Curr Ads  6000 375.053 375.053    375 209 530
## 6:    independent  New Ads  6000 344.971 344.971    345 188 483

library(lattice)
bwplot(reservations ~ adType| restaurantType, x,
        col = "black")

interaction.plot(x$restaurantType, x$adType, x$reservations,
                 type = "b", col = c(1:3), leg.bty = "o",
                 leg.bg = "beige", lwd = 2, pch = c(18, 24, 22))

anov3=aov(reservations~adType*restaurantType,x)
summary(anov3)

##                          Df Sum Sq Mean Sq   F value   Pr(>F)    
## adType                    2 394228  197114  7658.285  < 2e-16 ***
## restaurantType            1 749570  749570 29122.292  < 2e-16 ***
## adType:restaurantType     2    442     221     8.594 0.000186 ***
## Residuals             29994 772006      26                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#The reservations are different significantly at least between 1 pair of restaurant types, at least between a pair of ad Types and at least between a pair of a adType-restaurantType combo

#Normality of dependent variable
with(x,tapply(reservations,restaurantType,ad.test))

## $chain
## 
##  Anderson-Darling normality test
## 
## data:  X[[i]]
## A = 93.672, p-value < 2.2e-16
## 
## 
## $independent
## 
##  Anderson-Darling normality test
## 
## data:  X[[i]]
## A = 73.776, p-value < 2.2e-16

#Homogeneity of variance
#leveneTest(x$reservations,x$restaurantType*x$adType)

#pairwise.t.test(logres, interaction(restaurantType, adType), x, p.adjust.method = "BH", pool.sd = FALSE)

anov4=aov(logres~adType*restaurantType,x)
summary(anov4)

##                          Df Sum Sq Mean Sq F value Pr(>F)    
## adType                    2  278.1   139.1  7588.0 <2e-16 ***
## restaurantType            1  530.9   530.9 28967.7 <2e-16 ***
## adType:restaurantType     2    4.3     2.2   118.2 <2e-16 ***
## Residuals             29994  549.7     0.0                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.