with(data.df,table(adType))
## adType
## Curr Ads New Ads No Ads
## 10000 10000 10000
library(psych)
describe(data.df)[, c(1:5, 8:10)]
## vars n mean sd median min max range
## adType* 1 30000 2.00 0.82 2.0 1 3 2
## pageViews 2 30000 468.06 168.16 391.0 145 929 784
## phoneCalls 3 30000 37.71 7.97 37.0 17 77 60
## reservations 4 30000 36.55 7.99 36.0 15 79 64
## businessID 5 30000 15000.50 8660.40 15000.5 1 30000 29999
## restaurantType* 6 30000 1.60 0.49 2.0 1 2 1
boxplot(reservations ~ adType, data = data.df,
main = "Boxplot of Ad Type",
xlab = "Ad Type", ylab = "Reservations",
digits = 2)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
plotmeans(reservations ~ adType, data = data.df,
xlab = "Ad Type", ylab = "Reservations",digits = 2, col = "black",ccol = "blue", barwidth = 2,legends = TRUE, mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
oneWayfit <- aov(reservations ~ adType, data = data.df)
summary(oneWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 394228 197114 3885 <2e-16 ***
## Residuals 29997 1522018 51
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The p-value < 0.05. We can conclude - there are significant differences in number of reservations between three different ad types.
plot(oneWayfit, 2)
aovResiduals <- residuals(oneWayfit)
library(nortest)
ad.test(aovResiduals)
##
## Anderson-Darling normality test
##
## data: aovResiduals
## A = 305.49, p-value < 2.2e-16
We fail to reject the null hypothesis i.e., data is normally distributed. So, we cannot assume the normality of the data.
plot(oneWayfit, 1)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
leveneTest(reservations ~ adType, data = data.df)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 2 134.29 < 2.2e-16 ***
## 29997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Failed to reject the null hypothesis. Cannot assume the homogeneity of variance between the groups.
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
Trans <- BoxCoxTrans(data.df$reservations)
Trans
## Box-Cox Transformation
##
## 30000 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.00 31.00 36.00 36.55 41.00 79.00
##
## Largest/Smallest: 5.27
## Sample Skewness: 0.777
##
## Estimated Lambda: -0.2
## With fudge factor, Lambda = 0 will be used for transformations
Lamba has negative value. Hence, we use ??=0 Dependent variable will be a log transformation
oneWayfit <- aov(log(reservations) ~ adType, data = data.df)
summary(oneWayfit)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 278.1 139.05 3845 <2e-16 ***
## Residuals 29997 1084.8 0.04
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(data.df$reservations, data.df$adType, data = data.df, p.adjust.method = "BH", pool.sd = FALSE)
##
## Pairwise comparisons using t tests with non-pooled SD
##
## data: data.df$reservations and data.df$adType
##
## Curr Ads New Ads
## New Ads <2e-16 -
## No Ads 0.51 <2e-16
##
## P value adjustment method: BH
kruskal.test(log(reservations) ~ adType, data = data.df)
##
## Kruskal-Wallis rank sum test
##
## data: log(reservations) by adType
## Kruskal-Wallis chi-squared = 5856.4, df = 2, p-value < 2.2e-16
oneway.test(log(reservations) ~ adType, data = data.df)
##
## One-way analysis of means (not assuming equal variances)
##
## data: log(reservations) and adType
## F = 3892.4, num df = 2, denom df = 19993, p-value < 2.2e-16
The p-value < 0.05. We can conclude - there are significant differences in number of reservations between three different ad types.