restaurants <- read.csv("AdvertisingDataV2.csv")
# attaching data columns of the dataframe
attach(restaurants)
# dimension of the dataframe
dim(restaurants)
## [1] 30000 6
addmargins(table(adType))
## adType
## Curr Ads New Ads No Ads Sum
## 10000 10000 10000 30000
library(psych)
# Crucial dependent variable is reservations
describe(restaurants)
## vars n mean sd median trimmed mad min
## adType* 1 30000 2.00 0.82 2.0 2.00 1.48 1
## pageViews 2 30000 468.06 168.16 391.0 458.18 149.74 145
## phoneCalls 3 30000 37.71 7.97 37.0 37.20 7.41 17
## reservations 4 30000 36.55 7.99 36.0 35.97 7.41 15
## businessID 5 30000 15000.50 8660.40 15000.5 15000.50 11119.50 1
## restaurantType* 6 30000 1.60 0.49 2.0 1.62 0.00 1
## max range skew kurtosis se
## adType* 3 2 0.00 -1.50 0.00
## pageViews 929 784 0.45 -1.29 0.97
## phoneCalls 77 60 0.65 0.57 0.05
## reservations 79 64 0.78 0.88 0.05
## businessID 30000 29999 0.00 -1.20 50.00
## restaurantType* 2 1 -0.41 -1.83 0.00
library(data.table)
dt <- data.table(restaurants)
dt[, list(count = .N,
mean = round(mean(reservations), 3),
sd = round(sd(reservations), 3),
median = round(median(reservations), 3),
min = min(reservations),
max = max(reservations)),
by = list(adType)]
## adType count mean sd median min max
## 1: No Ads 10000 33.960 6.591 33 15 58
## 2: Curr Ads 10000 34.021 6.504 33 18 59
## 3: New Ads 10000 41.681 8.153 40 18 79
# box plot of ad type
boxplot(reservations ~ adType, data = restaurants,
main = "Boxplot of Ad Type",
xlab = "Ad Type", ylab = "Reservations",
digits = 2)
# Mean Plot of Reservations by Ad Type
# mean plot by ad type
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(reservations ~ adType, data = restaurants,
xlab = "Ad Type", ylab = "Number of Reservations",
digits = 2, col = "black", ccol = "blue", barwidth = 2,
legends = TRUE, mean.labels = TRUE, n.label=FALSE, frame = TRUE)
# one-way ANOVA
oneWayfit <- aov(reservations ~ adType, data = restaurants)
# summary of the ANOVA model
summary(oneWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 394228 197114 3885 <2e-16 ***
## Residuals 29997 1522018 51
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Anderson-Darling normality test
library(nortest)
ad.test(reservations)
##
## Anderson-Darling normality test
##
## data: reservations
## A = 222.08, p-value < 2.2e-16
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
leveneTest(reservations ~ adType, data = restaurants)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 134.29 < 2.2e-16 ***
## 29997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Tukey Comparison Test
TukeyHSD(oneWayfit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = reservations ~ adType, data = restaurants)
##
## $adType
## diff lwr upr p adj
## New Ads-Curr Ads 7.6593 7.4232043 7.8953957 0.0000000
## No Ads-Curr Ads -0.0608 -0.2968957 0.1752957 0.8181708
## No Ads-New Ads -7.7201 -7.9561957 -7.4840043 0.0000000
# Plotting the result
plot(TukeyHSD(oneWayfit))
# Box-Cox transformation
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
restaurants_trans <- BoxCoxTrans(restaurants$reservations)
restaurants_trans
## Box-Cox Transformation
##
## 30000 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.00 31.00 36.00 36.55 41.00 79.00
##
## Largest/Smallest: 5.27
## Sample Skewness: 0.777
##
## Estimated Lambda: -0.2
## With fudge factor, Lambda = 0 will be used for transformations
# Log transformation of variable reservations
restaurants$log_reservations <- log(restaurants$reservations)
attach(restaurants)
## The following objects are masked from restaurants (pos = 12):
##
## adType, businessID, pageViews, phoneCalls, reservations,
## restaurantType
# Checking for Normality
ad.test(log_reservations)
##
## Anderson-Darling normality test
##
## data: log_reservations
## A = 36.301, p-value < 2.2e-16
# Checking for Homogeneity of Variances
library(car)
leveneTest(log_reservations ~ adType, data = restaurants)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 24.465 2.419e-11 ***
## 29997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# one-way ANOVA
oneWayfit_transformed <- aov(log_reservations ~ adType, data = restaurants)
# summary of the ANOVA model
summary(oneWayfit_transformed)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 278.1 139.05 3845 <2e-16 ***
## Residuals 29997 1084.8 0.04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Kruskal-Wallis rank sum test
kruskal.test(reservations ~ adType, data = restaurants)
##
## Kruskal-Wallis rank sum test
##
## data: reservations by adType
## Kruskal-Wallis chi-squared = 5856.4, df = 2, p-value < 2.2e-16
# anova test when variances are not same
oneway.test(log_reservations ~ adType, data = restaurants)
##
## One-way analysis of means (not assuming equal variances)
##
## data: log_reservations and adType
## F = 3892.4, num df = 2, denom df = 19993, p-value < 2.2e-16
attach(restaurants)
## The following objects are masked from restaurants (pos = 3):
##
## adType, businessID, log_reservations, pageViews, phoneCalls,
## reservations, restaurantType
## The following objects are masked from restaurants (pos = 13):
##
## adType, businessID, pageViews, phoneCalls, reservations,
## restaurantType
addmargins(table(adType,restaurantType))
## restaurantType
## adType chain independent Sum
## Curr Ads 4000 6000 10000
## New Ads 4000 6000 10000
## No Ads 4000 6000 10000
## Sum 12000 18000 30000
library(psych)
# Crucial dependent variable is reservations
describe(restaurants)
## vars n mean sd median trimmed mad
## adType* 1 30000 2.00 0.82 2.00 2.00 1.48
## pageViews 2 30000 468.06 168.16 391.00 458.18 149.74
## phoneCalls 3 30000 37.71 7.97 37.00 37.20 7.41
## reservations 4 30000 36.55 7.99 36.00 35.97 7.41
## businessID 5 30000 15000.50 8660.40 15000.50 15000.50 11119.50
## restaurantType* 6 30000 1.60 0.49 2.00 1.62 0.00
## log_reservations 7 30000 3.58 0.21 3.58 3.57 0.22
## min max range skew kurtosis se
## adType* 1.00 3.00 2.00 0.00 -1.50 0.00
## pageViews 145.00 929.00 784.00 0.45 -1.29 0.97
## phoneCalls 17.00 77.00 60.00 0.65 0.57 0.05
## reservations 15.00 79.00 64.00 0.78 0.88 0.05
## businessID 1.00 30000.00 29999.00 0.00 -1.20 50.00
## restaurantType* 1.00 2.00 1.00 -0.41 -1.83 0.00
## log_reservations 2.71 4.37 1.66 0.14 -0.07 0.00
library(data.table)
dt <- data.table(restaurants)
dt[, list(count = .N,
mean = round(mean(reservations), 3),
sd = round(sd(reservations), 3),
median = round(median(reservations), 3),
min = min(reservations),
max = max(reservations)),
by = list(adType)]
## adType count mean sd median min max
## 1: No Ads 10000 33.960 6.591 33 15 58
## 2: Curr Ads 10000 34.021 6.504 33 18 59
## 3: New Ads 10000 41.681 8.153 40 18 79
dt[, list(count = .N,
mean = round(mean(reservations), 3),
sd = round(sd(reservations), 3),
median = round(median(reservations), 3),
min = min(reservations),
max = max(reservations)),
by = list(restaurantType)]
## restaurantType count mean sd median min max
## 1: chain 12000 42.676 7.464 42 18 79
## 2: independent 18000 32.473 5.261 32 15 51
# box plot of ad type
boxplot(reservations ~ adType, data = restaurants,
main = "Boxplot of Ad Type",
xlab = "Ad Type", ylab = "Reservations",
digits = 2)
# box plot of ad type
boxplot(reservations ~ restaurantType, data = restaurants,
main = "Boxplot of Ad Type",
xlab = "Restaurant Type", ylab = "Reservations",
digits = 2)
# box plot by adType and restaurant type
library(lattice)
bwplot(reservations ~ adType | restaurantType, data = restaurants, main = "Boxplot of Reservations by Ad Type and Restaurant Type", ylab = "Number of Reservations", col = "black")
# mean plot by ad type
library(gplots)
plotmeans(reservations ~ restaurantType, data = restaurants, xlab = "Restaurant Type", ylab = "Number of Reservations", digits = 2, col = "black", ccol = "blue", barwidth = 2, legends = TRUE, mean.labels = TRUE, n.label=FALSE, frame = TRUE)
interaction.plot(adType, restaurantType, reservations, type = "b", col = c(1:3), leg.bty = "o", leg.bg = "beige", lwd = 2, pch = c(18, 24, 22), xlab = "Restaurant type", ylab = "Number of Reservations", main = "Interaction Plot of Ad Type and Restaurant Type")
# two-way ANOVA
twoWayfit <- aov(reservations ~ adType * restaurantType, data = restaurants)
# summary of the ANOVA model
summary(twoWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 394228 197114 7658.285 < 2e-16 ***
## restaurantType 1 749570 749570 29122.292 < 2e-16 ***
## adType:restaurantType 2 442 221 8.594 0.000186 ***
## Residuals 29994 772006 26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot
plot(twoWayfit, 2)
# Anderson-Darling normality test
library(nortest)
ad.test(reservations)
##
## Anderson-Darling normality test
##
## data: reservations
## A = 222.08, p-value < 2.2e-16
# plot
plot(twoWayfit, 1)
library(car)
leveneTest(reservations ~ adType * restaurantType, data = restaurants)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 974.04 < 2.2e-16 ***
## 29994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# two-way ANOVA
twoWayfit_transformed <- aov(log_reservations ~ adType * restaurantType, data = restaurants)
# summary of the ANOVA model
summary(twoWayfit_transformed)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 278.1 139.1 7588.0 <2e-16 ***
## restaurantType 1 530.9 530.9 28967.7 <2e-16 ***
## adType:restaurantType 2 4.3 2.2 118.2 <2e-16 ***
## Residuals 29994 549.7 0.0
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Tukey comparison test
TukeyHSD(twoWayfit_transformed)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = log_reservations ~ adType * restaurantType, data = restaurants)
##
## $adType
## diff lwr upr p adj
## New Ads-Curr Ads 0.202877282 0.198390382 0.207364182 0.0000000
## No Ads-Curr Ads -0.002709695 -0.007196595 0.001777205 0.3328927
## No Ads-New Ads -0.205586978 -0.210073878 -0.201100078 0.0000000
##
## $restaurantType
## diff lwr upr p adj
## independent-chain -0.2715314 -0.2746583 -0.2684046 0
##
## $`adType:restaurantType`
## diff lwr
## New Ads:chain-Curr Ads:chain 0.170776091 0.162149996
## No Ads:chain-Curr Ads:chain -0.004558874 -0.013184969
## Curr Ads:independent-Curr Ads:chain -0.290392755 -0.298267266
## New Ads:independent-Curr Ads:chain -0.066114678 -0.073989190
## No Ads:independent-Curr Ads:chain -0.291869665 -0.299744176
## No Ads:chain-New Ads:chain -0.175334964 -0.183961060
## Curr Ads:independent-New Ads:chain -0.461168846 -0.469043357
## New Ads:independent-New Ads:chain -0.236890769 -0.244765280
## No Ads:independent-New Ads:chain -0.462645755 -0.470520267
## Curr Ads:independent-No Ads:chain -0.285833881 -0.293708393
## New Ads:independent-No Ads:chain -0.061555804 -0.069430316
## No Ads:independent-No Ads:chain -0.287310791 -0.295185302
## New Ads:independent-Curr Ads:independent 0.224278077 0.217234900
## No Ads:independent-Curr Ads:independent -0.001476910 -0.008520087
## No Ads:independent-New Ads:independent -0.225754986 -0.232798164
## upr p adj
## New Ads:chain-Curr Ads:chain 0.179402186 0.0000000
## No Ads:chain-Curr Ads:chain 0.004067221 0.6603760
## Curr Ads:independent-Curr Ads:chain -0.282518243 0.0000000
## New Ads:independent-Curr Ads:chain -0.058240167 0.0000000
## No Ads:independent-Curr Ads:chain -0.283995153 0.0000000
## No Ads:chain-New Ads:chain -0.166708869 0.0000000
## Curr Ads:independent-New Ads:chain -0.453294334 0.0000000
## New Ads:independent-New Ads:chain -0.229016257 0.0000000
## No Ads:independent-New Ads:chain -0.454771244 0.0000000
## Curr Ads:independent-No Ads:chain -0.277959370 0.0000000
## New Ads:independent-No Ads:chain -0.053681293 0.0000000
## No Ads:independent-No Ads:chain -0.279436279 0.0000000
## New Ads:independent-Curr Ads:independent 0.231321254 0.0000000
## No Ads:independent-Curr Ads:independent 0.005566268 0.9912292
## No Ads:independent-New Ads:independent -0.218711809 0.0000000
# Tukey pair-wise comparisons plot - differences in mean levels of adType
plot(TukeyHSD(twoWayfit_transformed, 1))
# Tukey pair-wise comparisons plot - differences in mean levels of restaurantType
plot(TukeyHSD(twoWayfit_transformed, 2))
# Tukey pair-wise comparisons plot - differences in mean levels of adType and restaurantType
plot(TukeyHSD(twoWayfit_transformed, 3))
# Kruskal-Wallis rank sum test
kruskal.test(reservations ~ interaction(adType, restaurantType), data = restaurants)
##
## Kruskal-Wallis rank sum test
##
## data: reservations by interaction(adType, restaurantType)
## Kruskal-Wallis chi-squared = 18667, df = 5, p-value < 2.2e-16
# anova test when variances are not same
oneway.test(log_reservations ~ interaction(adType, restaurantType), data = restaurants)
##
## One-way analysis of means (not assuming equal variances)
##
## data: log_reservations and interaction(adType, restaurantType)
## F = 7794, num df = 5, denom df = 12999, p-value < 2.2e-16