data.df = read.csv(paste("AdvertisingDataV2.csv", sep=""))
addmargins(table(data.df$adType))
##
## Curr Ads New Ads No Ads Sum
## 10000 10000 10000 30000
library(data.table)
dt <- data.table(data.df)
dt[, list(Count = .N,
mean = round(mean(pageViews), 3),
sd = round(mean(pageViews), 3),
median = round(median(pageViews), 3),
min = min(pageViews),
max = max(pageViews)),
by = list(adType)]
## adType Count mean sd median min max
## 1: No Ads 10000 419.779 419.779 339 145 766
## 2: Curr Ads 10000 501.191 501.191 419 209 929
## 3: New Ads 10000 483.211 483.211 384 188 918
# box plot of organ
boxplot(pageViews ~ adType, data = data.df,
main = "Boxplot of adType",
xlab = "adType", ylab = "pageViews")
# mean plot
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(pageViews ~ adType, data = data.df,
xlab = "adType", ylab = "pageViews",
digits=2, col = "black", ccol = "blue", barwidth = 2,
legends = TRUE, mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
oneWayfit <- aov(pageViews ~ adType, data = data.df)
# summary of the ANOVA model
summary(oneWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 36582190 18291095 675.9 <2e-16 ***
## Residuals 29997 811743988 27061
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# check for normality in each group
library(nortest)
ad.test(data.df$pageViews)
##
## Anderson-Darling normality test
##
## data: data.df$pageViews
## A = 1432.3, p-value < 2.2e-16
# We use Anderson-Darling normality test since, sample size is >5000
# with(data.df, tapply(pageViews , adType, shapiro.test))
# Check for homogeneity of variance
library(car)
## Loading required package: carData
leveneTest(pageViews ~ adType, data = data.df)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 57.346 < 2.2e-16 ***
## 29997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We reject the null hypothesis. There is heterogeneity in variance of pageViews
# Check for homogeneity of variance
library(car)
leveneTest(data.df$pageViews , data.df$adType, data = data.df)
## Levene's Test for Homogeneity of Variance (center = median: data.df)
## Df F value Pr(>F)
## group 2 57.346 < 2.2e-16 ***
## 29997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Tukey comparison test
TukeyHSD(oneWayfit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = pageViews ~ adType, data = data.df)
##
## $adType
## diff lwr upr p adj
## New Ads-Curr Ads -17.9798 -23.4322 -12.5274 0
## No Ads-Curr Ads -81.4114 -86.8638 -75.9590 0
## No Ads-New Ads -63.4316 -68.8840 -57.9792 0
# Tukey pair-wise comparisons plot
plot(TukeyHSD(oneWayfit))
# Kruskal-Wallis rank sum test
kruskal.test(pageViews ~ adType, data = data.df)
##
## Kruskal-Wallis rank sum test
##
## data: pageViews by adType
## Kruskal-Wallis chi-squared = 2865.7, df = 2, p-value < 2.2e-16
We use, Kruskal-Wallis Rank Sum Test. This test can be used when the normality assumption is violated.
# anova test when variances are not same
oneway.test(pageViews ~ adType, data = data.df)
##
## One-way analysis of means (not assuming equal variances)
##
## data: pageViews and adType
## F = 734.5, num df = 2, denom df = 19934, p-value < 2.2e-16
We use Welch One-way Test. This test is used when variances are heterogeneous.
addmargins(table(data.df$adType, data.df$restaurantType))
##
## chain independent Sum
## Curr Ads 4000 6000 10000
## New Ads 4000 6000 10000
## No Ads 4000 6000 10000
## Sum 12000 18000 30000
library(data.table)
dt <- data.table(data.df)
dt[, list(Count = .N,
mean = round(mean(pageViews), 3),
sd = round(mean(pageViews), 3),
median = round(median(pageViews), 3),
min = min(pageViews),
max = max(pageViews)),
by = list(adType)]
## adType Count mean sd median min max
## 1: No Ads 10000 419.779 419.779 339 145 766
## 2: Curr Ads 10000 501.191 501.191 419 209 929
## 3: New Ads 10000 483.211 483.211 384 188 918
dt <- data.table(data.df)
dt[, list(Count = .N,
mean = round(mean(pageViews), 3),
sd = round(mean(pageViews), 3),
median = round(median(pageViews), 3),
min = min(pageViews),
max = max(pageViews)),
by = list(restaurantType)]
## restaurantType Count mean sd median min max
## 1: chain 12000 660.180 660.180 660 437 929
## 2: independent 18000 339.981 339.981 339 145 530
# box plot of adType and restaurantType
library(lattice)
bwplot(pageViews ~ adType | restaurantType, data = data.df,
main = "Boxplot of adType and restaurantType",
ylab = "Number of pageViews",
col = "black")
library(gplots)
plotmeans(pageViews ~ adType, data = data.df,
xlab = "adType", ylab = "pageViews",
digits=2, col = "black", ccol = "blue", barwidth = 2,
legends = TRUE, mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
plotmeans(pageViews ~ restaurantType, data = data.df,
xlab = "adType", ylab = "pageViews",
digits=2, col = "black", ccol = "blue", barwidth = 2,
legends = TRUE, mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
# two-way ANOVA
twoWayfit <- aov(pageViews ~ adType * restaurantType, data = data.df)
# summary of the ANOVA model
summary(twoWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 36582190 18291095 7734.8 <2e-16 ***
## restaurantType 1 738196252 738196252 312161.4 <2e-16 ***
## adType:restaurantType 2 2618217 1309109 553.6 <2e-16 ***
## Residuals 29994 70929518 2365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# normal Q-Q plot
plot(twoWayfit, 2)
# Anderson-Darling normality test
library(nortest)
ad.test(data.df$pageViews)
##
## Anderson-Darling normality test
##
## data: data.df$pageViews
## A = 1432.3, p-value < 2.2e-16
# residual versus fitted plot
plot(twoWayfit, 1)
# Levene test for homogeneity of variance
library(car)
leveneTest(pageViews ~ adType * restaurantType, data = data.df)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 315.24 < 2.2e-16 ***
## 29994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Kruskal-Wallis rank sum test
kruskal.test(pageViews ~ interaction(adType, restaurantType), data = data.df)
##
## Kruskal-Wallis rank sum test
##
## data: pageViews by interaction(adType, restaurantType)
## Kruskal-Wallis chi-squared = 24655, df = 5, p-value < 2.2e-16