ANOVA on class

df<- read.csv("AdvertisingDataV2.csv")
attach(df)

dim(df)

## [1] 30000     6

summary(df)

##       adType        pageViews       phoneCalls     reservations  
##  Curr Ads:10000   Min.   :145.0   Min.   :17.00   Min.   :15.00  
##  New Ads :10000   1st Qu.:328.0   1st Qu.:32.00   1st Qu.:31.00  
##  No Ads  :10000   Median :391.0   Median :37.00   Median :36.00  
##                   Mean   :468.1   Mean   :37.71   Mean   :36.55  
##                   3rd Qu.:636.0   3rd Qu.:42.00   3rd Qu.:41.00  
##                   Max.   :929.0   Max.   :77.00   Max.   :79.00  
##    businessID        restaurantType 
##  Min.   :    1   chain      :12000  
##  1st Qu.: 7501   independent:18000  
##  Median :15000                      
##  Mean   :15000                      
##  3rd Qu.:22500                      
##  Max.   :30000

str(df)

## 'data.frame':    30000 obs. of  6 variables:
##  $ adType        : Factor w/ 3 levels "Curr Ads","New Ads",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ pageViews     : int  643 621 581 592 648 519 583 659 507 577 ...
##  $ phoneCalls    : int  44 41 40 35 45 37 47 37 40 41 ...
##  $ reservations  : int  39 44 38 31 46 41 42 42 30 35 ...
##  $ businessID    : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ restaurantType: Factor w/ 2 levels "chain","independent": 1 1 1 1 1 1 1 1 1 1 ...

library("ggpubr")

## Warning: package 'ggpubr' was built under R version 3.5.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.5.3

## Loading required package: magrittr

ggline(df, x = "adType", y = "reservations",
       add = c("mean_se", "jitter"),
       ylab = "reservations", xlab = "adType")

# Show the levels and counts
library(dplyr)

## Warning: package 'dplyr' was built under R version 3.5.3

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

group_by(df, adType) %>%
  summarise(count = n(),
    mean = mean(reservations, na.rm = TRUE),
    sd = sd(reservations, na.rm = TRUE))

## # A tibble: 3 x 4
##   adType   count  mean    sd
##   <fct>    <int> <dbl> <dbl>
## 1 Curr Ads 10000  34.0  6.50
## 2 New Ads  10000  41.7  8.15
## 3 No Ads   10000  34.0  6.59

# Compute the analysis of variance
res.aov <- aov(reservations ~ adType, data = df)

# Summary of the analysis
summary(res.aov)

##                Df  Sum Sq Mean Sq F value Pr(>F)    
## adType          2  394228  197114    3885 <2e-16 ***
## Residuals   29997 1522018      51                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# two-way interaction plot
interaction.plot(adType, restaurantType, reservations,
                 type = "b", col = c(2:3),
                 leg.bty = "o", leg.bg = "beige",
                 lwd=2, pch=c(18,24),
                 xlab = "adType", ylab = "reservations",
                 main = "Interaction plot")

library(dplyr)
group_by(df, adType, restaurantType) %>%
  summarise(count = n(),
    mean = mean(reservations),
    sd = sd(reservations))

## # A tibble: 6 x 5
## # Groups:   adType [3]
##   adType   restaurantType count  mean    sd
##   <fct>    <fct>          <int> <dbl> <dbl>
## 1 Curr Ads chain           4000  40.1  5.03
## 2 Curr Ads independent     6000  30.0  3.55
## 3 New Ads  chain           4000  48.0  8.60
## 4 New Ads  independent     6000  37.5  4.13
## 5 No Ads   chain           4000  39.9  5.03
## 6 No Ads   independent     6000  30.0  4.00

# two-way ANOVA test
res.aov2 <- aov(reservations ~ restaurantType + adType + restaurantType:adType,
                 data = df)
summary(res.aov2)

##                          Df Sum Sq Mean Sq   F value   Pr(>F)    
## restaurantType            1 749570  749570 29122.292  < 2e-16 ***
## adType                    2 394228  197114  7658.285  < 2e-16 ***
## restaurantType:adType     2    442     221     8.594 0.000186 ***
## Residuals             29994 772006      26                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOVA on class

Abhijeet Hansda

03/10/2019