x=read.csv("AdvertisingDataV2.csv")
head(x)
## adType pageViews phoneCalls reservations businessID restaurantType
## 1 No Ads 643 44 39 1 chain
## 2 No Ads 621 41 44 2 chain
## 3 No Ads 581 40 38 3 chain
## 4 No Ads 592 35 31 4 chain
## 5 No Ads 648 45 46 5 chain
## 6 No Ads 519 37 41 6 chain
unique(x$adType)
## [1] No Ads Curr Ads New Ads
## Levels: Curr Ads New Ads No Ads
unique(x$restaurantType)
## [1] chain independent
## Levels: chain independent
aggregate(x$reservations,by=list(x$adType,x$restaurantType),sum)
## Group.1 Group.2 x
## 1 Curr Ads chain 160404
## 2 New Ads chain 192008
## 3 No Ads chain 159700
## 4 Curr Ads independent 179808
## 5 New Ads independent 224797
## 6 No Ads independent 179904
library(psych)
describe(x$reservations)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 30000 36.55 7.99 36 35.97 7.41 15 79 64 0.78 0.88
## se
## X1 0.05
boxplot(x$reservations~x$restaurantType)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(x$reservations~x$restaurantType)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
plotmeans(x$reservations~x$adType)
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
anov1=aov(x$reservations~x$restaurantType)
summary(anov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## x$restaurantType 1 749570 749570 19273 <2e-16 ***
## Residuals 29998 1166677 39
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Normality of dependent variable
library(nortest)
with(x,tapply(reservations,restaurantType,ad.test))
## $chain
##
## Anderson-Darling normality test
##
## data: X[[i]]
## A = 93.672, p-value < 2.2e-16
##
##
## $independent
##
## Anderson-Darling normality test
##
## data: X[[i]]
## A = 73.776, p-value < 2.2e-16
#Homogeneity of variance
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
leveneTest(x$reservations,x$restaurantType)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 1045.8 < 2.2e-16 ***
## 29998
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(anov1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = x$reservations ~ x$restaurantType)
##
## $`x$restaurantType`
## diff lwr upr p adj
## independent-chain -10.20328 -10.34733 -10.05923 0
plot(TukeyHSD(anov1))
x$logres=log(x$reservations)
anov2=aov(x$logres~x$restaurantType)
summary(anov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## x$restaurantType 1 530.9 530.9 19138 <2e-16 ***
## Residuals 29998 832.1 0.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aggregate(x$reservations,by=list(x$adType,x$restaurantType),sum)
## Group.1 Group.2 x
## 1 Curr Ads chain 160404
## 2 New Ads chain 192008
## 3 No Ads chain 159700
## 4 Curr Ads independent 179808
## 5 New Ads independent 224797
## 6 No Ads independent 179904
aggregate(x$pageViews,by=list(x$adType,x$restaurantType),sum)
## Group.1 Group.2 x
## 1 Curr Ads chain 2761587
## 2 New Ads chain 2762284
## 3 No Ads chain 2398285
## 4 Curr Ads independent 2250321
## 5 New Ads independent 2069826
## 6 No Ads independent 1799509
aggregate(x$phoneCalls,by=list(x$adType,x$restaurantType),sum)
## Group.1 Group.2 x
## 1 Curr Ads chain 176085
## 2 New Ads chain 192315
## 3 No Ads chain 160277
## 4 Curr Ads independent 197800
## 5 New Ads independent 224830
## 6 No Ads independent 179919
library(data.table)
dt <- data.table(x)
dt[, list(Count = .N,
mean = round(mean(reservations), 3),
sd = round(mean(reservations), 3),
median = round(median(reservations), 3),
min = min(reservations),
max = max(reservations)),
by = list(restaurantType,adType)]
## restaurantType adType Count mean sd median min max
## 1: chain No Ads 4000 39.925 39.925 40 20 58
## 2: chain Curr Ads 4000 40.101 40.101 40 23 59
## 3: chain New Ads 4000 48.002 48.002 48 18 79
## 4: independent No Ads 6000 29.984 29.984 30 15 48
## 5: independent Curr Ads 6000 29.968 29.968 30 18 43
## 6: independent New Ads 6000 37.466 37.466 37 23 51
dt[, list(Count = .N,
mean = round(mean(phoneCalls), 3),
sd = round(mean(phoneCalls), 3),
median = round(median(phoneCalls), 3),
min = min(phoneCalls),
max = max(phoneCalls)),
by = list(restaurantType,adType)]
## restaurantType adType Count mean sd median min max
## 1: chain No Ads 4000 40.069 40.069 40 22 58
## 2: chain Curr Ads 4000 44.021 44.021 44 25 63
## 3: chain New Ads 4000 48.079 48.079 48 19 77
## 4: independent No Ads 6000 29.986 29.986 30 17 45
## 5: independent Curr Ads 6000 32.967 32.967 33 20 50
## 6: independent New Ads 6000 37.472 37.472 37 22 53
dt[, list(Count = .N,
mean = round(mean(pageViews), 3),
sd = round(mean(pageViews), 3),
median = round(median(pageViews), 3),
min = min(pageViews),
max = max(pageViews)),
by = list(restaurantType,adType)]
## restaurantType adType Count mean sd median min max
## 1: chain No Ads 4000 599.571 599.571 599 437 766
## 2: chain Curr Ads 4000 690.397 690.397 690 444 929
## 3: chain New Ads 4000 690.571 690.571 690 440 918
## 4: independent No Ads 6000 299.918 299.918 300 145 450
## 5: independent Curr Ads 6000 375.053 375.053 375 209 530
## 6: independent New Ads 6000 344.971 344.971 345 188 483
library(lattice)
bwplot(reservations ~ adType| restaurantType, x,
col = "black")
interaction.plot(x$restaurantType, x$adType, x$reservations,
type = "b", col = c(1:3), leg.bty = "o",
leg.bg = "beige", lwd = 2, pch = c(18, 24, 22))
anov3=aov(reservations~adType*restaurantType,x)
summary(anov3)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 394228 197114 7658.285 < 2e-16 ***
## restaurantType 1 749570 749570 29122.292 < 2e-16 ***
## adType:restaurantType 2 442 221 8.594 0.000186 ***
## Residuals 29994 772006 26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#The reservations are different significantly at least between 1 pair of restaurant types, at least between a pair of ad Types and at least between a pair of a adType-restaurantType combo
#Normality of dependent variable
with(x,tapply(reservations,restaurantType,ad.test))
## $chain
##
## Anderson-Darling normality test
##
## data: X[[i]]
## A = 93.672, p-value < 2.2e-16
##
##
## $independent
##
## Anderson-Darling normality test
##
## data: X[[i]]
## A = 73.776, p-value < 2.2e-16
#Homogeneity of variance
#leveneTest(x$reservations,x$restaurantType*x$adType)
#pairwise.t.test(logres, interaction(restaurantType, adType), x, p.adjust.method = "BH", pool.sd = FALSE)
anov4=aov(logres~adType*restaurantType,x)
summary(anov4)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 278.1 139.1 7588.0 <2e-16 ***
## restaurantType 1 530.9 530.9 28967.7 <2e-16 ***
## adType:restaurantType 2 4.3 2.2 118.2 <2e-16 ***
## Residuals 29994 549.7 0.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.