library(data.table)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(nortest)
library(car)
## Loading required package: carData
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
restaurant<-read.csv("C:/Users/ALKR/Desktop/Term V/DAM/Dataset/AdvertisingDataV2.csv")
attach(restaurant)
addmargins(table(adType))
## adType
## Curr Ads New Ads No Ads Sum
## 10000 10000 10000 30000
dt <- data.table(restaurant)
dt[, list(Count = .N,
mean = round(mean(pageViews), 3),
sd = round(mean(pageViews), 3),
median = round(median(pageViews), 3),
min = min(pageViews),
max = max(pageViews)),
by = list(adType)]
## adType Count mean sd median min max
## 1: No Ads 10000 419.779 419.779 339 145 766
## 2: Curr Ads 10000 501.191 501.191 419 209 929
## 3: New Ads 10000 483.211 483.211 384 188 918
boxplot(pageViews ~ adType, data = restaurant,
main = "Boxplot of Page Views vs Ad Type",
xlab = "Ad Type", ylab = "Page Views")
plotmeans(pageViews ~ adType, data = restaurant,
xlab = "Ad Type", ylab = "Page Views",
digits=2, col = "black", ccol = "blue", barwidth = 2,
legends = TRUE, mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
oneWayfit <- aov(pageViews ~ adType, data = restaurant)
summary(oneWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 36582190 18291095 675.9 <2e-16 ***
## Residuals 29997 811743988 27061
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The p-value < 0.05, we can conclude that there are significant differences in page views between the different advertising types.
with(restaurant, tapply(pageViews, adType, ad.test))
## $`Curr Ads`
##
## Anderson-Darling normality test
##
## data: X[[i]]
## A = 589.56, p-value < 2.2e-16
##
##
## $`New Ads`
##
## Anderson-Darling normality test
##
## data: X[[i]]
## A = 739.4, p-value < 2.2e-16
##
##
## $`No Ads`
##
## Anderson-Darling normality test
##
## data: X[[i]]
## A = 671.15, p-value < 2.2e-16
Anderson Darling test Null hypothesis (H0): The data is normally distributed We reject the null hypothesis since p-value is less than 0.05. Hence, the data is not normally distributed.
leveneTest(pageViews ~ adType, data = restaurant)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 57.346 < 2.2e-16 ***
## 29997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Check for homogeneity of variance Null hypothesis (H0): The variance of page views is homogenous across different types of Ads. We reject the null hypothesis. There is heterogeneity in variance of page views.
TukeyHSD(oneWayfit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = pageViews ~ adType, data = restaurant)
##
## $adType
## diff lwr upr p adj
## New Ads-Curr Ads -17.9798 -23.4322 -12.5274 0
## No Ads-Curr Ads -81.4114 -86.8638 -75.9590 0
## No Ads-New Ads -63.4316 -68.8840 -57.9792 0
plot(TukeyHSD(oneWayfit))
kruskal.test(pageViews ~ adType, data = restaurant)
##
## Kruskal-Wallis rank sum test
##
## data: pageViews by adType
## Kruskal-Wallis chi-squared = 2865.7, df = 2, p-value < 2.2e-16
pageViewsTrans <- BoxCoxTrans(pageViews)
pageViewsTrans
## Box-Cox Transformation
##
## 30000 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 145.0 328.0 391.0 468.1 636.0 929.0
##
## Largest/Smallest: 6.41
## Sample Skewness: 0.451
##
## Estimated Lambda: -0.3
restaurant <- cbind(restaurant, pageViewsNew = predict(pageViewsTrans, pageViews))
attach(restaurant)
## The following objects are masked from restaurant (pos = 3):
##
## adType, businessID, pageViews, phoneCalls, reservations,
## restaurantType
oneWaytransfit <- aov(pageViewsNew ~ adType, data = restaurant)
summary(oneWaytransfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 5.44 2.7183 879.2 <2e-16 ***
## Residuals 29997 92.74 0.0031
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Welch One-way Test. ANOVA test when variances are not same
oneway.test(pageViews ~ adType, data = restaurant)
##
## One-way analysis of means (not assuming equal variances)
##
## data: pageViews and adType
## F = 734.5, num df = 2, denom df = 19934, p-value < 2.2e-16
addmargins(table(adType, restaurantType))
## restaurantType
## adType chain independent Sum
## Curr Ads 4000 6000 10000
## New Ads 4000 6000 10000
## No Ads 4000 6000 10000
## Sum 12000 18000 30000
PageViews
dt <- data.table(restaurant)
dt[, list(Count = .N,
mean = round(mean(pageViews), 3),
sd = round(mean(pageViews), 3),
median = round(median(pageViews), 3),
min = min(pageViews),
max = max(pageViews)),
by = list(restaurantType, adType)]
## restaurantType adType Count mean sd median min max
## 1: chain No Ads 4000 599.571 599.571 599 437 766
## 2: chain Curr Ads 4000 690.397 690.397 690 444 929
## 3: chain New Ads 4000 690.571 690.571 690 440 918
## 4: independent No Ads 6000 299.918 299.918 300 145 450
## 5: independent Curr Ads 6000 375.053 375.053 375 209 530
## 6: independent New Ads 6000 344.971 344.971 345 188 483
PhoneCalls # b. Phone Calls
dt <- data.table(restaurant)
dt[, list(Count = .N,
mean = round(mean(phoneCalls), 3),
sd = round(mean(phoneCalls), 3),
median = round(median(phoneCalls), 3),
min = min(phoneCalls),
max = max(phoneCalls)),
by = list(restaurantType, adType)]
## restaurantType adType Count mean sd median min max
## 1: chain No Ads 4000 40.069 40.069 40 22 58
## 2: chain Curr Ads 4000 44.021 44.021 44 25 63
## 3: chain New Ads 4000 48.079 48.079 48 19 77
## 4: independent No Ads 6000 29.986 29.986 30 17 45
## 5: independent Curr Ads 6000 32.967 32.967 33 20 50
## 6: independent New Ads 6000 37.472 37.472 37 22 53
bwplot(pageViews ~ adType | restaurantType, data = restaurant,
main = "Boxplot of Page Views vs Ad Type and Restaurant Type",
ylab = "Page Views",
col = "black")
interaction.plot(adType, restaurantType, pageViews,
type = "b", col = c(1:3), leg.bty = "o",
leg.bg = "beige", lwd = 2, pch = c(18, 24, 22),
xlab = "Ad Type", ylab = "Page Views",
main = "Interaction Plot of Page Views by Ad Type and Restaurant Type")
twoWayfit <- aov(pageViews ~ adType * restaurantType, data = restaurant)
summary(twoWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 36582190 18291095 7734.8 <2e-16 ***
## restaurantType 1 738196252 738196252 312161.4 <2e-16 ***
## adType:restaurantType 2 2618217 1309109 553.6 <2e-16 ***
## Residuals 29994 70929518 2365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
All the p-values are less than 0.05. Hence, we reject the null hypotheses. We can conclude that there are significant differences in page views between the ad type as well as restaurant type.
H0: Data is normally distributed Checking for normality in page Views using AD test
ad.test(pageViews)
##
## Anderson-Darling normality test
##
## data: pageViews
## A = 1432.3, p-value < 2.2e-16
Normality test is statistically significant, we reject the null hypothesis. Hence, we cannot assume normality in the data.
leveneTest(pageViews ~ adType * restaurantType, data = restaurant)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 315.24 < 2.2e-16 ***
## 29994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The test is statistically significant, we reject the null hypothesis. Hence, we cannot assume the homogeneity of variance between the groups.
TukeyHSD(twoWayfit)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = pageViews ~ adType * restaurantType, data = restaurant)
##
## $adType
## diff lwr upr p adj
## New Ads-Curr Ads -17.9798 -19.59161 -16.36799 0
## No Ads-Curr Ads -81.4114 -83.02321 -79.79959 0
## No Ads-New Ads -63.4316 -65.04341 -61.81979 0
##
## $restaurantType
## diff lwr upr p adj
## independent-chain -320.1988 -321.322 -319.0755 0
##
## $`adType:restaurantType`
## diff lwr
## New Ads:chain-Curr Ads:chain 0.17425 -2.924463
## No Ads:chain-Curr Ads:chain -90.82550 -93.924213
## Curr Ads:independent-Curr Ads:chain -315.34325 -318.171975
## New Ads:independent-Curr Ads:chain -345.42575 -348.254475
## No Ads:independent-Curr Ads:chain -390.47858 -393.307308
## No Ads:chain-New Ads:chain -90.99975 -94.098463
## Curr Ads:independent-New Ads:chain -315.51750 -318.346225
## New Ads:independent-New Ads:chain -345.60000 -348.428725
## No Ads:independent-New Ads:chain -390.65283 -393.481558
## Curr Ads:independent-No Ads:chain -224.51775 -227.346475
## New Ads:independent-No Ads:chain -254.60025 -257.428975
## No Ads:independent-No Ads:chain -299.65308 -302.481808
## New Ads:independent-Curr Ads:independent -30.08250 -32.612588
## No Ads:independent-Curr Ads:independent -75.13533 -77.665422
## No Ads:independent-New Ads:independent -45.05283 -47.582922
## upr p adj
## New Ads:chain-Curr Ads:chain 3.272963 0.9999854
## No Ads:chain-Curr Ads:chain -87.726787 0.0000000
## Curr Ads:independent-Curr Ads:chain -312.514525 0.0000000
## New Ads:independent-Curr Ads:chain -342.597025 0.0000000
## No Ads:independent-Curr Ads:chain -387.649859 0.0000000
## No Ads:chain-New Ads:chain -87.901037 0.0000000
## Curr Ads:independent-New Ads:chain -312.688775 0.0000000
## New Ads:independent-New Ads:chain -342.771275 0.0000000
## No Ads:independent-New Ads:chain -387.824109 0.0000000
## Curr Ads:independent-No Ads:chain -221.689025 0.0000000
## New Ads:independent-No Ads:chain -251.771525 0.0000000
## No Ads:independent-No Ads:chain -296.824359 0.0000000
## New Ads:independent-Curr Ads:independent -27.552412 0.0000000
## No Ads:independent-Curr Ads:independent -72.605245 0.0000000
## No Ads:independent-New Ads:independent -42.522745 0.0000000
plot(TukeyHSD(twoWayfit))
pairwise.t.test(pageViews, interaction(adType, restaurantType), data = restaurant, p.adjust.method = "BH", pool.sd = FALSE)
##
## Pairwise comparisons using t tests with non-pooled SD
##
## data: pageViews and interaction(adType, restaurantType)
##
## Curr Ads.chain New Ads.chain No Ads.chain
## New Ads.chain 0.9 - -
## No Ads.chain <2e-16 <2e-16 -
## Curr Ads.independent <2e-16 <2e-16 <2e-16
## New Ads.independent <2e-16 <2e-16 <2e-16
## No Ads.independent <2e-16 <2e-16 <2e-16
## Curr Ads.independent New Ads.independent
## New Ads.chain - -
## No Ads.chain - -
## Curr Ads.independent - -
## New Ads.independent <2e-16 -
## No Ads.independent <2e-16 <2e-16
##
## P value adjustment method: BH
twoWayTransfit <- aov(pageViewsNew ~ adType * restaurantType, data = restaurant)
summary(twoWayTransfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 5.44 2.72 7525 <2e-16 ***
## restaurantType 1 81.43 81.43 225414 <2e-16 ***
## adType:restaurantType 2 0.48 0.24 670 <2e-16 ***
## Residuals 29994 10.83 0.00
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ad.test(pageViewsNew)
##
## Anderson-Darling normality test
##
## data: pageViewsNew
## A = 933.12, p-value < 2.2e-16
leveneTest(pageViewsNew ~ adType * restaurantType, data = restaurant)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 5 656.9 < 2.2e-16 ***
## 29994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1