df<- read.csv("AdvertisingDataV2.csv")
attach(df)
dim(df)
## [1] 30000 6
summary(df)
## adType pageViews phoneCalls reservations
## Curr Ads:10000 Min. :145.0 Min. :17.00 Min. :15.00
## New Ads :10000 1st Qu.:328.0 1st Qu.:32.00 1st Qu.:31.00
## No Ads :10000 Median :391.0 Median :37.00 Median :36.00
## Mean :468.1 Mean :37.71 Mean :36.55
## 3rd Qu.:636.0 3rd Qu.:42.00 3rd Qu.:41.00
## Max. :929.0 Max. :77.00 Max. :79.00
## businessID restaurantType
## Min. : 1 chain :12000
## 1st Qu.: 7501 independent:18000
## Median :15000
## Mean :15000
## 3rd Qu.:22500
## Max. :30000
str(df)
## 'data.frame': 30000 obs. of 6 variables:
## $ adType : Factor w/ 3 levels "Curr Ads","New Ads",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ pageViews : int 643 621 581 592 648 519 583 659 507 577 ...
## $ phoneCalls : int 44 41 40 35 45 37 47 37 40 41 ...
## $ reservations : int 39 44 38 31 46 41 42 42 30 35 ...
## $ businessID : int 1 2 3 4 5 6 7 8 9 10 ...
## $ restaurantType: Factor w/ 2 levels "chain","independent": 1 1 1 1 1 1 1 1 1 1 ...
library("ggpubr")
## Warning: package 'ggpubr' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
## Loading required package: magrittr
ggline(df, x = "adType", y = "reservations",
add = c("mean_se", "jitter"),
ylab = "reservations", xlab = "adType")

# Show the levels and counts
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
group_by(df, adType) %>%
summarise(count = n(),
mean = mean(reservations, na.rm = TRUE),
sd = sd(reservations, na.rm = TRUE))
## # A tibble: 3 x 4
## adType count mean sd
## <fct> <int> <dbl> <dbl>
## 1 Curr Ads 10000 34.0 6.50
## 2 New Ads 10000 41.7 8.15
## 3 No Ads 10000 34.0 6.59
# Compute the analysis of variance
res.aov <- aov(reservations ~ adType, data = df)
# Summary of the analysis
summary(res.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## adType 2 394228 197114 3885 <2e-16 ***
## Residuals 29997 1522018 51
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# two-way interaction plot
interaction.plot(adType, restaurantType, reservations,
type = "b", col = c(2:3),
leg.bty = "o", leg.bg = "beige",
lwd=2, pch=c(18,24),
xlab = "adType", ylab = "reservations",
main = "Interaction plot")

library(dplyr)
group_by(df, adType, restaurantType) %>%
summarise(count = n(),
mean = mean(reservations),
sd = sd(reservations))
## # A tibble: 6 x 5
## # Groups: adType [3]
## adType restaurantType count mean sd
## <fct> <fct> <int> <dbl> <dbl>
## 1 Curr Ads chain 4000 40.1 5.03
## 2 Curr Ads independent 6000 30.0 3.55
## 3 New Ads chain 4000 48.0 8.60
## 4 New Ads independent 6000 37.5 4.13
## 5 No Ads chain 4000 39.9 5.03
## 6 No Ads independent 6000 30.0 4.00
# two-way ANOVA test
res.aov2 <- aov(reservations ~ restaurantType + adType + restaurantType:adType,
data = df)
summary(res.aov2)
## Df Sum Sq Mean Sq F value Pr(>F)
## restaurantType 1 749570 749570 29122.292 < 2e-16 ***
## adType 2 394228 197114 7658.285 < 2e-16 ***
## restaurantType:adType 2 442 221 8.594 0.000186 ***
## Residuals 29994 772006 26
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1