Load libraries & data
library(readr)
## Warning: package 'readr' was built under R version 3.6.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data <- read.csv("datasets/SubAdvData.csv")
attach(data)
Q.1a Write R code to display the following output regarding the number of restaurants
addmargins(table(adType, restaurantType),2)
## restaurantType
## adType chain independent Sum
## Curr Ads 2023 2972 4995
## New Ads 1958 3010 4968
## No Ads 2003 3034 5037
Q.1b Write R code to display the following output regarding the percentages of restaurants
prop.table(table(adType, restaurantType))
## restaurantType
## adType chain independent
## Curr Ads 0.1348667 0.1981333
## New Ads 0.1305333 0.2006667
## No Ads 0.1335333 0.2022667
Q.1c Write R code to display the following output regarding average number of reservations
data %>% group_by(restaurantType) %>% summarise(reservations=mean(reservations))
## # A tibble: 2 x 2
## restaurantType reservations
## <fct> <dbl>
## 1 chain 42.6
## 2 independent 32.5
Q.1d Write R code to display the following output regarding average number of reservations
data %>% group_by(adType) %>% summarise(reservations=mean(reservations))
## # A tibble: 3 x 2
## adType reservations
## <fct> <dbl>
## 1 Curr Ads 34.0
## 2 New Ads 41.6
## 3 No Ads 34.0
Part 2
Q.2a Measure the correlation between the number of reservations & number of phonecalls received by the restaurants?
cor(reservations, phoneCalls)
## [1] 0.6516813
Q.2b Run the Pearson’s product moment correlation test for the number of reservations & number of phonecalls received by the restaurants?
cor.test(reservations, phoneCalls, method="pearson")
##
## Pearson's product-moment correlation
##
## data: reservations and phoneCalls
## t = 105.22, df = 14998, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6423774 0.6607932
## sample estimates:
## cor
## 0.6516813
Q.2c Run a statistical test to compare whether the fraction of restaurants running current advertisements is 40%?
prop.test(x=4995, n=15000, p=0.4, correct = F)
##
## 1-sample proportions test without continuity correction
##
## data: 4995 out of 15000, null probability 0.4
## X-squared = 280.56, df = 1, p-value < 2.2e-16
## alternative hypothesis: true p is not equal to 0.4
## 95 percent confidence interval:
## 0.3255016 0.3405839
## sample estimates:
## p
## 0.333
Q.2d Compare whether the average number of reservation equals 40.00?
t.test(reservations, mu=40)
##
## One Sample t-test
##
## data: reservations
## t = -53.711, df = 14999, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 40
## 95 percent confidence interval:
## 36.39943 36.65297
## sample estimates:
## mean of x
## 36.5262
Q.2e Run a statistical test to compare the average number of reservations at chain restaurants with the average number of reservations at independent restaurants?
chain <- data %>% filter(restaurantType=="chain") %>% select(reservations)
independent <- data %>% filter(restaurantType=="independent") %>% select(reservations)
t.test(chain, independent, var.equal = T)
##
## Two Sample t-test
##
## data: chain and independent
## t = 97.503, df = 14998, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 9.872633 10.277718
## sample estimates:
## mean of x mean of y
## 42.58205 32.50688