1 Frequency Table

with(data.df,table(adType))
## adType
## Curr Ads  New Ads   No Ads 
##    10000    10000    10000

2.Descriptive statistics

library(psych)
describe(data.df)[, c(1:5, 8:10)]
##                 vars     n     mean      sd  median min   max range
## adType*            1 30000     2.00    0.82     2.0   1     3     2
## pageViews          2 30000   468.06  168.16   391.0 145   929   784
## phoneCalls         3 30000    37.71    7.97    37.0  17    77    60
## reservations       4 30000    36.55    7.99    36.0  15    79    64
## businessID         5 30000 15000.50 8660.40 15000.5   1 30000 29999
## restaurantType*    6 30000     1.60    0.49     2.0   1     2     1

3.Box plots

boxplot(reservations ~ adType, data = data.df,
        main = "Boxplot of Ad Type",
        xlab = "Ad Type", ylab = "Reservations",
        digits = 2)

4. Mean Plots

library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
plotmeans(reservations ~ adType, data = data.df,
          xlab = "Ad Type", ylab = "Reservations",digits = 2, col = "black",ccol = "blue", barwidth = 2,legends = TRUE, mean.labels = TRUE, frame = TRUE)
## Warning in text.default(x, y, label = labels, col = col, ...): "frame" is
## not a graphical parameter
## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, li, x, pmax(y - gap, li), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(x, ui, x, pmin(y + gap, ui), col = barcol, lwd = lwd, :
## zero-length arrow is of indeterminate angle and so skipped
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter
## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is
## not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

5.Oneway ANOVA

oneWayfit <- aov(reservations ~ adType, data = data.df)
summary(oneWayfit)
##                Df  Sum Sq Mean Sq F value Pr(>F)    
## adType          2  394228  197114    3885 <2e-16 ***
## Residuals   29997 1522018      51                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The p-value < 0.05. We can conclude - there are significant differences in number of reservations between three different ad types.

6.ANOVA Assumptions

  1. Normality of Residuals
plot(oneWayfit, 2)

Anderson-Darling normality test

aovResiduals <- residuals(oneWayfit)
library(nortest)
ad.test(aovResiduals)
## 
##  Anderson-Darling normality test
## 
## data:  aovResiduals
## A = 305.49, p-value < 2.2e-16

We fail to reject the null hypothesis i.e., data is normally distributed. So, we cannot assume the normality of the data.

Homogeneity of Variance

plot(oneWayfit, 1)

library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
leveneTest(reservations ~ adType, data = data.df)
## Levene's Test for Homogeneity of Variance (center = median)
##          Df F value    Pr(>F)    
## group     2  134.29 < 2.2e-16 ***
##       29997                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Failed to reject the null hypothesis. Cannot assume the homogeneity of variance between the groups.

Box-Cox Transormation

library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
Trans <- BoxCoxTrans(data.df$reservations)
Trans
## Box-Cox Transformation
## 
## 30000 data points used to estimate Lambda
## 
## Input data summary:
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.00   31.00   36.00   36.55   41.00   79.00 
## 
## Largest/Smallest: 5.27 
## Sample Skewness: 0.777 
## 
## Estimated Lambda: -0.2 
## With fudge factor, Lambda = 0 will be used for transformations

Lamba has negative value. Hence, we use ??=0 Dependent variable will be a log transformation

Oneway ANOVA after Box-Cox Transformation

oneWayfit <- aov(log(reservations) ~ adType, data = data.df)
summary(oneWayfit)
##                Df Sum Sq Mean Sq F value Pr(>F)    
## adType          2  278.1  139.05    3845 <2e-16 ***
## Residuals   29997 1084.8    0.04                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

7. Pairwise t-test

pairwise.t.test(data.df$reservations, data.df$adType, data = data.df, p.adjust.method = "BH", pool.sd = FALSE)
## 
##  Pairwise comparisons using t tests with non-pooled SD 
## 
## data:  data.df$reservations and data.df$adType 
## 
##         Curr Ads New Ads
## New Ads <2e-16   -      
## No Ads  0.51     <2e-16 
## 
## P value adjustment method: BH

8.When Normality are not met

kruskal.test(log(reservations) ~ adType, data = data.df)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  log(reservations) by adType
## Kruskal-Wallis chi-squared = 5856.4, df = 2, p-value < 2.2e-16

9. When Variances are not equal

oneway.test(log(reservations) ~ adType, data = data.df)
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  log(reservations) and adType
## F = 3892.4, num df = 2, denom df = 19993, p-value < 2.2e-16

The p-value < 0.05. We can conclude - there are significant differences in number of reservations between three different ad types.